From 2e46c414481269c2f928da6d2e439d9cf895febd Mon Sep 17 00:00:00 2001
From: Peyton Lee <peytolee@amd.com>
Date: Tue, 25 Feb 2025 16:51:21 +0800
Subject: [PATCH] amd/gmlib: add gmlib for radeonsi

radeonsi drivers can use gmlib to generate 3dlut used to do tonemapping.

Signed-off-by: Peyton Lee <peytolee@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33731>
---
 src/amd/gmlib/README.md                       |    1 +
 .../ToneMapGenerator/inc/ToneMapGenerator.h   |   45 +
 .../gmlib/ToneMapGenerator/inc/ToneMapTypes.h |   73 +
 .../ToneMapGenerator/src/inc/AGMGenerator.h   |   39 +
 .../ToneMapGenerator/src/inc/CSCGenerator.h   |  176 ++
 .../ToneMapGenerator/src/src/AGMGenerator.c   |  261 +++
 .../src/src/ToneMapGenerator.c                |  354 ++++
 src/amd/gmlib/gm/cs_funcs.c                   | 1418 ++++++++++++++++
 src/amd/gmlib/gm/cs_funcs.h                   |  273 +++
 src/amd/gmlib/gm/csc_api_funcs.c              |   75 +
 src/amd/gmlib/gm/csc_api_funcs.h              |   41 +
 src/amd/gmlib/gm/csc_funcs.c                  |   56 +
 src/amd/gmlib/gm/csc_funcs.h                  |   41 +
 src/amd/gmlib/gm/cvd_api_funcs.c              |   85 +
 src/amd/gmlib/gm/cvd_api_funcs.h              |   42 +
 src/amd/gmlib/gm/cvd_funcs.c                  |  132 ++
 src/amd/gmlib/gm/cvd_funcs.h                  |   57 +
 src/amd/gmlib/gm/gm_api_funcs.c               |  194 +++
 src/amd/gmlib/gm/gm_api_funcs.h               |   79 +
 src/amd/gmlib/gm/gm_funcs.c                   | 1492 +++++++++++++++++
 src/amd/gmlib/gm/gm_funcs.h                   |  299 ++++
 src/amd/gmlib/gm/mat_funcs.c                  |  918 ++++++++++
 src/amd/gmlib/gm/mat_funcs.h                  |  143 ++
 src/amd/gmlib/meson.build                     |   65 +
 src/amd/gmlib/tonemap_adaptor.c               |   78 +
 src/amd/gmlib/tonemap_adaptor.h               |   33 +
 src/amd/meson.build                           |    1 +
 27 files changed, 6471 insertions(+)
 create mode 100755 src/amd/gmlib/README.md
 create mode 100755 src/amd/gmlib/ToneMapGenerator/inc/ToneMapGenerator.h
 create mode 100755 src/amd/gmlib/ToneMapGenerator/inc/ToneMapTypes.h
 create mode 100755 src/amd/gmlib/ToneMapGenerator/src/inc/AGMGenerator.h
 create mode 100755 src/amd/gmlib/ToneMapGenerator/src/inc/CSCGenerator.h
 create mode 100755 src/amd/gmlib/ToneMapGenerator/src/src/AGMGenerator.c
 create mode 100755 src/amd/gmlib/ToneMapGenerator/src/src/ToneMapGenerator.c
 create mode 100755 src/amd/gmlib/gm/cs_funcs.c
 create mode 100755 src/amd/gmlib/gm/cs_funcs.h
 create mode 100755 src/amd/gmlib/gm/csc_api_funcs.c
 create mode 100755 src/amd/gmlib/gm/csc_api_funcs.h
 create mode 100755 src/amd/gmlib/gm/csc_funcs.c
 create mode 100755 src/amd/gmlib/gm/csc_funcs.h
 create mode 100755 src/amd/gmlib/gm/cvd_api_funcs.c
 create mode 100755 src/amd/gmlib/gm/cvd_api_funcs.h
 create mode 100755 src/amd/gmlib/gm/cvd_funcs.c
 create mode 100755 src/amd/gmlib/gm/cvd_funcs.h
 create mode 100755 src/amd/gmlib/gm/gm_api_funcs.c
 create mode 100755 src/amd/gmlib/gm/gm_api_funcs.h
 create mode 100755 src/amd/gmlib/gm/gm_funcs.c
 create mode 100755 src/amd/gmlib/gm/gm_funcs.h
 create mode 100755 src/amd/gmlib/gm/mat_funcs.c
 create mode 100755 src/amd/gmlib/gm/mat_funcs.h
 create mode 100755 src/amd/gmlib/meson.build
 create mode 100755 src/amd/gmlib/tonemap_adaptor.c
 create mode 100755 src/amd/gmlib/tonemap_adaptor.h

diff --git a/src/amd/gmlib/README.md b/src/amd/gmlib/README.md
new file mode 100755
index 00000000000..8f84f009122
--- /dev/null
+++ b/src/amd/gmlib/README.md
@@ -0,0 +1 @@
+# GMLib
\ No newline at end of file
diff --git a/src/amd/gmlib/ToneMapGenerator/inc/ToneMapGenerator.h b/src/amd/gmlib/ToneMapGenerator/inc/ToneMapGenerator.h
new file mode 100755
index 00000000000..228ec295889
--- /dev/null
+++ b/src/amd/gmlib/ToneMapGenerator/inc/ToneMapGenerator.h
@@ -0,0 +1,45 @@
+/* Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ * Authors: AMD
+ *
+ */
+
+#pragma once
+#include "ToneMapTypes.h"
+#include "AGMGenerator.h"
+
+struct SrcTmParams {
+    struct ToneMapHdrMetaData    streamMetaData;
+    enum ToneMapTransferFunction inputContainerGamma;
+};
+
+struct DstTmParams {
+    struct ToneMapHdrMetaData    dstMetaData;
+    enum ToneMapTransferFunction outputContainerGamma;
+    enum ToneMapColorPrimaries   outputContainerPrimaries;
+};
+
+struct ToneMapGenerator {
+    struct AGMGenerator agmGenerator;
+    enum ToneMapAlgorithm tmAlgo;
+    bool memAllocSet;
+    struct SrcTmParams cachedSrcTmParams;
+    struct DstTmParams cachedDstTmParams;
+};
+
+enum TMGReturnCode ToneMapGenerator_GenerateToneMappingParameters(
+    struct ToneMapGenerator* p_tmGenerator,
+    const struct ToneMapHdrMetaData* streamMetaData,
+    const struct ToneMapHdrMetaData* dstMetaData,
+    enum ToneMapTransferFunction inputContainerGamma,
+    enum ToneMapTransferFunction outputContainerGamma,
+    enum ToneMapColorPrimaries  outputContainerPrimaries,
+    unsigned short lutDim,
+    struct ToneMappingParameters* tmParams);
+
+enum TMGReturnCode ToneMapGenerator_SetInternalAllocators(
+    struct ToneMapGenerator* p_tmGenerator,
+    TMGAlloc                 allocFunc,
+    TMGFree                  freeFunc,
+    void*                    memCtx);
diff --git a/src/amd/gmlib/ToneMapGenerator/inc/ToneMapTypes.h b/src/amd/gmlib/ToneMapGenerator/inc/ToneMapTypes.h
new file mode 100755
index 00000000000..ad08b6942b4
--- /dev/null
+++ b/src/amd/gmlib/ToneMapGenerator/inc/ToneMapTypes.h
@@ -0,0 +1,73 @@
+/* Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ * Authors: AMD
+ *
+ */
+
+#pragma once
+#include <stdbool.h>
+
+#define MAX_LUMINANCE 10000.0
+#define INPUT_NORMALIZATION_FACTOR 4000 //nits
+typedef void* (*TMGAlloc)(unsigned int, void*);
+typedef void (*TMGFree)(void*, void*);
+
+struct ToneMapHdrMetaData
+{
+    unsigned short  redPrimaryX;
+    unsigned short  redPrimaryY;
+    unsigned short  greenPrimaryX;
+    unsigned short  greenPrimaryY;
+    unsigned short  bluePrimaryX;
+    unsigned short  bluePrimaryY;
+    unsigned short  whitePointX;
+    unsigned short  whitePointY;
+    unsigned int    maxMasteringLuminance;
+    unsigned int    minMasteringLuminance;
+    unsigned short  maxContentLightLevel;
+    unsigned short  maxFrameAverageLightLevel;
+};
+
+enum ToneMapTransferFunction {
+    TMG_TF_SRGB,
+    TMG_TF_BT709,
+    TMG_TF_G24,
+    TMG_TF_PQ,
+    TMG_TF_NormalizedPQ,
+    TMG_TF_ModifiedPQ,
+    TMG_TF_Linear,
+    TMG_TF_HLG
+};
+
+enum ToneMapColorPrimaries {
+    TMG_CP_BT601,
+    TMG_CP_BT709,
+    TMG_CP_BT2020,
+    TMG_CP_DCIP3
+};
+
+enum ToneMapAlgorithm {
+    TMG_A_AGM,
+    TMG_A_BT2390,
+    TMG_A_BT2390_4
+};
+
+struct ToneMappingParameters {
+    enum ToneMapColorPrimaries   lutColorIn;
+    enum ToneMapColorPrimaries   lutColorOut;
+    enum ToneMapTransferFunction shaperTf;
+    enum ToneMapTransferFunction lutOutTf;
+    unsigned short               lutDim;
+    unsigned short*              lutData;
+    void*                        formattedLutData;
+    unsigned short               inputNormalizationFactor;
+};
+
+enum TMGReturnCode {
+    TMG_RET_OK,
+    TMG_RET_ERROR_DUPLICATE_INIT,
+    TMG_RET_ERROR_INVALID_PARAM,
+    TMG_RET_ERROR_NOT_INITIALIZED,
+    TMG_RET_ERROR_GMLIB
+};
diff --git a/src/amd/gmlib/ToneMapGenerator/src/inc/AGMGenerator.h b/src/amd/gmlib/ToneMapGenerator/src/inc/AGMGenerator.h
new file mode 100755
index 00000000000..031d54219d5
--- /dev/null
+++ b/src/amd/gmlib/ToneMapGenerator/src/inc/AGMGenerator.h
@@ -0,0 +1,39 @@
+/* Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ * Authors: AMD
+ *
+ */
+
+#pragma once
+#include "ToneMapTypes.h"
+#include "gm_api_funcs.h"
+
+/* Replace CPP class: AGMGenerator */
+struct AGMGenerator {
+    TMGAlloc           allocFunc;
+    TMGFree            freeFunc;
+    void*              memoryContext;
+    bool               initalized;
+    struct s_gamut_map agmParams;
+    struct s_gm_opts   gamutMapParams;
+};
+
+enum TMGReturnCode AGMGenerator_ApplyToneMap(
+    struct AGMGenerator*                p_agm_generator,
+    const struct ToneMapHdrMetaData*    streamMetaData,
+    const struct ToneMapHdrMetaData*    dtMetaData,
+    const enum ToneMapAlgorithm         tmAlgorithm,
+    const struct ToneMappingParameters* tmParams,
+    bool                                updateSrcParams,
+    bool                                updateDstParams,
+    bool                                enableMerge3DLUT);
+
+enum TMGReturnCode AGMGenerator_SetGMAllocator(
+    struct AGMGenerator* p_agm_generator,
+    TMGAlloc             allocFunc,
+    TMGFree              freeFunc,
+    void*                memCtx);
+
+/* Replace ~AGMGenerator() */
+void AGMGenerator_Exit(struct AGMGenerator* p_agm_generator);
diff --git a/src/amd/gmlib/ToneMapGenerator/src/inc/CSCGenerator.h b/src/amd/gmlib/ToneMapGenerator/src/inc/CSCGenerator.h
new file mode 100755
index 00000000000..9c0b795c564
--- /dev/null
+++ b/src/amd/gmlib/ToneMapGenerator/src/inc/CSCGenerator.h
@@ -0,0 +1,176 @@
+/* Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ * Authors: AMD
+ *
+ */
+
+#pragma once
+#include "csc_api_funcs.h"
+#include "ToneMapTypes.h"
+
+static bool TranslateTfEnum(
+    enum ToneMapTransferFunction inTf,
+    enum cs_gamma_type*          outTf)
+{
+
+    switch (inTf) {
+    case(TMG_TF_SRGB):
+        *outTf = EGT_sRGB;
+        break;
+    case(TMG_TF_BT709):
+        *outTf = EGT_709;
+        break;
+    case(TMG_TF_G24):
+        *outTf = EGT_2_4;
+        break;
+    case(TMG_TF_HLG):
+        *outTf = EGT_HLG;
+        break;
+    case(TMG_TF_NormalizedPQ):
+    case(TMG_TF_PQ):
+        *outTf = EGT_PQ;
+        break;
+    default:
+        return false;
+    }
+    return true;
+}
+
+static void CSCCtor(struct s_csc_map* csc_map)
+{
+    csc_ctor(csc_map);
+}
+
+static enum TMGReturnCode CSCSetOptions(
+    const struct ToneMapHdrMetaData*    srcMetaData,
+    enum ToneMapTransferFunction        inTf,
+    const struct ToneMapHdrMetaData*    dstMetaData,
+    enum ToneMapTransferFunction        outTf,
+    const struct ToneMappingParameters* tmParams,
+    bool                                merge3DLUT,
+    struct s_csc_api_opts*              csc_opts)
+{
+
+    enum TMGReturnCode ret = TMG_RET_OK;
+    enum cs_gamma_type inGamma;
+    enum cs_gamma_type outGamma;
+
+    if (!TranslateTfEnum(inTf, &inGamma)) {
+        ret = TMG_RET_ERROR_INVALID_PARAM;
+        goto exit;
+    }
+
+    if(!TranslateTfEnum(outTf, &outGamma)) {
+        ret = TMG_RET_ERROR_INVALID_PARAM;
+        goto exit;
+    }
+
+    csc_opts->ptr_3dlut_rgb  = tmParams->lutData;
+    csc_opts->num_pnts_3dlut = tmParams->lutDim;
+    csc_opts->bitwidth_3dlut = 12;
+    csc_opts->en_merge_3dlut = merge3DLUT;
+
+
+    csc_opts->cs_opts_src.color_space_type = ECST_CUSTOM;
+    csc_opts->cs_opts_src.rgbw_xy[0] =
+        srcMetaData->redPrimaryX / 50000.0;
+    csc_opts->cs_opts_src.rgbw_xy[1] =
+        srcMetaData->redPrimaryY / 50000.0;
+    csc_opts->cs_opts_src.rgbw_xy[2] =
+        srcMetaData->greenPrimaryX / 50000.0;
+    csc_opts->cs_opts_src.rgbw_xy[3] =
+        srcMetaData->greenPrimaryY / 50000.0;
+    csc_opts->cs_opts_src.rgbw_xy[4] =
+        srcMetaData->bluePrimaryX / 50000.0;
+    csc_opts->cs_opts_src.rgbw_xy[5] =
+        srcMetaData->bluePrimaryY / 50000.0;
+    csc_opts->cs_opts_src.rgbw_xy[6] =
+        srcMetaData->whitePointX / 50000.0;
+    csc_opts->cs_opts_src.rgbw_xy[7] =
+        srcMetaData->whitePointY / 50000.0;
+
+    csc_opts->cs_opts_src.gamma_type          = inGamma;
+    csc_opts->cs_opts_src.luminance_limits[0] = 0.0;
+    csc_opts->cs_opts_src.luminance_limits[1] =
+        (double)srcMetaData->maxMasteringLuminance;
+
+    if (inTf == TMG_TF_NormalizedPQ)
+        csc_opts->cs_opts_src.pq_norm = (double)tmParams->inputNormalizationFactor;
+    else
+        csc_opts->cs_opts_src.pq_norm = MAX_LUMINANCE;
+
+
+    csc_opts->cs_opts_dst.color_space_type = ECST_CUSTOM;
+    csc_opts->cs_opts_dst.rgbw_xy[0] =
+        dstMetaData->redPrimaryX / 50000.0;
+    csc_opts->cs_opts_dst.rgbw_xy[1] =
+        dstMetaData->redPrimaryY / 50000.0;
+    csc_opts->cs_opts_dst.rgbw_xy[2] =
+        dstMetaData->greenPrimaryX / 50000.0;
+    csc_opts->cs_opts_dst.rgbw_xy[3] =
+        dstMetaData->greenPrimaryY / 50000.0;
+    csc_opts->cs_opts_dst.rgbw_xy[4] =
+        dstMetaData->bluePrimaryX / 50000.0;
+    csc_opts->cs_opts_dst.rgbw_xy[5] =
+        dstMetaData->bluePrimaryY / 50000.0;
+    csc_opts->cs_opts_dst.rgbw_xy[6] =
+        dstMetaData->whitePointX / 50000.0;
+    csc_opts->cs_opts_dst.rgbw_xy[7] =
+        dstMetaData->whitePointY / 50000.0;
+
+    csc_opts->cs_opts_dst.gamma_type          = outGamma;
+    csc_opts->cs_opts_dst.luminance_limits[0] = 0.0;
+    csc_opts->cs_opts_dst.luminance_limits[1] =
+        (double)dstMetaData->maxMasteringLuminance;
+
+    if (outTf == TMG_TF_NormalizedPQ)
+        csc_opts->cs_opts_dst.pq_norm = (double)tmParams->inputNormalizationFactor;
+    else
+        csc_opts->cs_opts_dst.pq_norm = MAX_LUMINANCE;
+
+    exit:
+    return ret;
+}
+
+static void CSCSetDefault(struct s_csc_api_opts* csc_opts)
+{
+    csc_api_set_def(csc_opts);
+}
+
+static void CSCGenerateMap(struct s_csc_api_opts* csc_opts, struct s_csc_map* csc_map)
+{
+    csc_api_gen_map(csc_opts, csc_map);
+}
+
+static enum TMGReturnCode CSCGenerate3DLUT(struct s_csc_api_opts* csc_opts, struct s_csc_map* csc_map)
+{
+    int retcode = csc_api_gen_3dlut(csc_opts, csc_map);
+
+    return retcode ? TMG_RET_ERROR_GMLIB : TMG_RET_OK;
+}
+
+static enum TMGReturnCode CSCGenerator_ApplyCSC(
+    const struct ToneMapHdrMetaData* srcMetaData,
+    enum ToneMapTransferFunction     inTf,
+    const struct ToneMapHdrMetaData* dstMetaData,
+    enum ToneMapTransferFunction     outTf,
+    struct ToneMappingParameters*    tmParams,
+    bool                             enable3DLUTMerge)
+{
+    struct s_csc_map      csc_map;
+    struct s_csc_api_opts csc_opts;
+
+    CSCCtor(&csc_map);
+    CSCSetDefault(&csc_opts);
+    CSCSetOptions(srcMetaData,
+        inTf,
+        dstMetaData,
+        outTf,
+        tmParams,
+        enable3DLUTMerge,
+        &csc_opts);
+    CSCGenerateMap(&csc_opts, &csc_map);
+
+    return CSCGenerate3DLUT(&csc_opts, &csc_map);
+}
diff --git a/src/amd/gmlib/ToneMapGenerator/src/src/AGMGenerator.c b/src/amd/gmlib/ToneMapGenerator/src/src/AGMGenerator.c
new file mode 100755
index 00000000000..4a002163794
--- /dev/null
+++ b/src/amd/gmlib/ToneMapGenerator/src/src/AGMGenerator.c
@@ -0,0 +1,261 @@
+/* Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "AGMGenerator.h"
+
+// Function declaration
+void AGMGenerator_GMCtor(struct AGMGenerator* p_agm_generator);
+void AGMGenerator_GMSetDefault(struct AGMGenerator* p_agm_generator);
+enum TMGReturnCode AGMGenerator_SetAgmOptions(
+    struct AGMGenerator*                p_agm_generator,
+    const struct ToneMapHdrMetaData*    srcMetaData,
+    const struct ToneMapHdrMetaData*    dstMetaData,
+    const enum ToneMapAlgorithm         tmAlgorithm,
+    const struct ToneMappingParameters* tmParams,
+    bool                                updateSrcParams,
+    bool                                updateDstParams,
+    bool                                enableMerge3DLUT);
+enum TMGReturnCode AGMGenerator_GMGenerateMap(struct AGMGenerator* p_agm_generator);
+enum TMGReturnCode AGMGenerator_GMGenerate3DLUT(struct AGMGenerator* p_agm_generator);
+
+static bool TranslateTfEnum(
+    enum ToneMapTransferFunction inTf,
+    enum cs_gamma_type* outTf)
+{
+
+    switch (inTf) {
+    case(TMG_TF_SRGB):
+        *outTf = EGT_sRGB;
+        break;
+    case(TMG_TF_BT709):
+        *outTf = EGT_709;
+        break;
+    case(TMG_TF_G24):
+        *outTf = EGT_2_4;
+        break;
+    case(TMG_TF_HLG):
+        *outTf = EGT_HLG;
+        break;
+    case(TMG_TF_NormalizedPQ):
+    case(TMG_TF_PQ):
+        *outTf = EGT_PQ;
+        break;
+    default:
+        return false;
+    }
+    return true;
+}
+
+enum TMGReturnCode AGMGenerator_SetGMAllocator(
+    struct AGMGenerator* p_agm_generator,
+    TMGAlloc             allocFunc,
+    TMGFree              freeFunc,
+    void*                memCtx)
+{
+    p_agm_generator->allocFunc     = allocFunc;
+    p_agm_generator->freeFunc      = freeFunc;
+    p_agm_generator->memoryContext = memCtx;
+    return TMG_RET_OK;
+}
+
+enum TMGReturnCode AGMGenerator_ApplyToneMap(
+    struct AGMGenerator*                p_agm_generator,
+    const struct ToneMapHdrMetaData*    streamMetaData,
+    const struct ToneMapHdrMetaData*    dstMetaData,
+    const enum ToneMapAlgorithm         tmAlgorithm,
+    const struct ToneMappingParameters* tmParams,
+    bool                                updateSrcParams,
+    bool                                updateDstParams,
+    bool                                enableMerge3DLUT)
+{
+    enum TMGReturnCode ret = TMG_RET_OK;
+
+    if (!p_agm_generator->initalized) {
+        AGMGenerator_GMCtor(p_agm_generator);
+        AGMGenerator_GMSetDefault(p_agm_generator);
+        p_agm_generator->initalized = true;
+    }
+
+    if ((ret = AGMGenerator_SetAgmOptions(
+        p_agm_generator,
+        streamMetaData,
+        dstMetaData,
+        tmAlgorithm,
+        tmParams,
+        updateSrcParams,
+        updateDstParams,
+        enableMerge3DLUT)) != TMG_RET_OK)
+        goto exit;
+
+    if ((ret = AGMGenerator_GMGenerateMap(p_agm_generator)) != TMG_RET_OK)
+        goto exit;
+
+    if ((ret = AGMGenerator_GMGenerate3DLUT(p_agm_generator)) != TMG_RET_OK)
+        goto exit;
+
+exit:
+    return ret;
+}
+
+enum TMGReturnCode AGMGenerator_SetAgmOptions(
+    struct AGMGenerator*                p_agm_generator,
+    const struct ToneMapHdrMetaData*    srcMetaData,
+    const struct ToneMapHdrMetaData*    dstMetaData,
+    const enum ToneMapAlgorithm         tmAlgorithm,
+    const struct ToneMappingParameters* tmParams,
+    bool                                updateSrcParams,
+    bool                                updateDstParams,
+    bool                                enableMerge3DLUT)
+{
+    enum TMGReturnCode ret = TMG_RET_OK;
+    enum cs_gamma_type inGamma;
+    enum cs_gamma_type outGamma;
+
+    if (!TranslateTfEnum(tmParams->shaperTf, &inGamma)) {
+        ret = TMG_RET_ERROR_INVALID_PARAM;
+        goto exit;
+    }
+
+    if (!TranslateTfEnum(tmParams->lutOutTf, &outGamma)) {
+        ret = TMG_RET_ERROR_INVALID_PARAM;
+        goto exit;
+    }
+
+    if (tmAlgorithm == TMG_A_AGM) {
+        p_agm_generator->gamutMapParams.gamut_map_mode = EGMM_TM_CHTO;
+        p_agm_generator->gamutMapParams.hue_rot_mode   = EHRM_HR;
+    }
+    else {
+        p_agm_generator->gamutMapParams.gamut_map_mode = EGMM_TM;
+        p_agm_generator->gamutMapParams.hue_rot_mode   = EHRM_NONE;
+    }
+
+    p_agm_generator->gamutMapParams.update_msk = updateSrcParams ? GM_UPDATE_SRC : 0;
+    p_agm_generator->gamutMapParams.update_msk = updateDstParams ? (p_agm_generator->gamutMapParams.update_msk | GM_UPDATE_DST) : p_agm_generator->gamutMapParams.update_msk;
+
+    p_agm_generator->gamutMapParams.ptr_3dlut_rgb     = tmParams->lutData;
+    p_agm_generator->gamutMapParams.num_pnts_3dlut    = tmParams->lutDim;
+    p_agm_generator->gamutMapParams.bitwidth_3dlut    = 12;
+    p_agm_generator->gamutMapParams.en_merge_3dlut    = enableMerge3DLUT;
+    p_agm_generator->gamutMapParams.mode              = GM_PQTAB_GBD;
+    p_agm_generator->gamutMapParams.en_tm_scale_color = 1;
+    p_agm_generator->gamutMapParams.num_hue_pnts      = GM_NUM_HUE;
+    p_agm_generator->gamutMapParams.num_edge_pnts     = GM_NUM_EDGE;
+    p_agm_generator->gamutMapParams.num_int_pnts      = GM_NUM_INT;
+    p_agm_generator->gamutMapParams.org2_perc_c       = GM_ORG2_PERC;
+    p_agm_generator->gamutMapParams.step_samp         = 0.0005; // GM_STEP_SAMP = 0.0001;
+    p_agm_generator->gamutMapParams.show_pix_mode     = ESPM_NONE;
+
+    for (int i = 0; i < GM_NUM_PRIM; i++) {
+        p_agm_generator->gamutMapParams.vec_org1_factor[i] = gm_vec_org13_factor_def[i][0];
+        p_agm_generator->gamutMapParams.vec_org3_factor[i] = gm_vec_org13_factor_def[i][1];
+    }
+
+    p_agm_generator->gamutMapParams.cs_opts_src.color_space_type = ECST_CUSTOM;
+    p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[0] =
+        srcMetaData->redPrimaryX / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[1] =
+        srcMetaData->redPrimaryY / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[2] =
+        srcMetaData->greenPrimaryX / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[3] =
+        srcMetaData->greenPrimaryY / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[4] =
+        srcMetaData->bluePrimaryX / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[5] =
+        srcMetaData->bluePrimaryY / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[6] =
+        srcMetaData->whitePointX / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_src.rgbw_xy[7] =
+        srcMetaData->whitePointY / 50000.0;
+
+    p_agm_generator->gamutMapParams.cs_opts_src.gamma_type = inGamma;
+    p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[0] = 0;
+    p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[1] =
+        (double)srcMetaData->maxMasteringLuminance;
+
+    if (tmParams->shaperTf == TMG_TF_NormalizedPQ) {
+        p_agm_generator->gamutMapParams.cs_opts_src.pq_norm = (double)tmParams->inputNormalizationFactor;
+    }
+    else {
+        p_agm_generator->gamutMapParams.cs_opts_src.pq_norm = MAX_LUMINANCE;
+    }
+
+
+    p_agm_generator->gamutMapParams.cs_opts_dst.color_space_type = ECST_CUSTOM;
+    p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[0] =
+        dstMetaData->redPrimaryX / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[1] =
+        dstMetaData->redPrimaryY / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[2] =
+        dstMetaData->greenPrimaryX / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[3] =
+        dstMetaData->greenPrimaryY / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[4] =
+        dstMetaData->bluePrimaryX / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[5] =
+        dstMetaData->bluePrimaryY / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[6] =
+        dstMetaData->whitePointX / 50000.0;
+    p_agm_generator->gamutMapParams.cs_opts_dst.rgbw_xy[7] =
+        dstMetaData->whitePointY / 50000.0;
+
+    p_agm_generator->gamutMapParams.cs_opts_dst.gamma_type          = outGamma;
+    p_agm_generator->gamutMapParams.cs_opts_dst.mode                = 0;
+    p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[0] = 0;
+    p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[1] =
+        (double)dstMetaData->maxMasteringLuminance;
+
+    if (tmParams->lutOutTf == TMG_TF_NormalizedPQ) {
+        p_agm_generator->gamutMapParams.cs_opts_dst.pq_norm = (double)tmParams->inputNormalizationFactor;
+    }
+    else {
+        p_agm_generator->gamutMapParams.cs_opts_dst.pq_norm = MAX_LUMINANCE;
+    }
+
+    // Correct Luminance Bounds if Neccessary
+    if (p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[0] > p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[0]) {
+        p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[0] = p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[0];
+        p_agm_generator->gamutMapParams.update_msk |= GM_UPDATE_SRC;
+    }
+    if (p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[1] < p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[1]) {
+        p_agm_generator->gamutMapParams.cs_opts_src.luminance_limits[1] = p_agm_generator->gamutMapParams.cs_opts_dst.luminance_limits[1];
+        p_agm_generator->gamutMapParams.update_msk |= GM_UPDATE_SRC;
+    }
+
+exit: 
+    return ret;
+}
+
+void AGMGenerator_GMSetDefault(struct AGMGenerator* p_agm_generator)
+{
+    gm_api_set_def(&p_agm_generator->gamutMapParams);
+}
+
+enum TMGReturnCode AGMGenerator_GMGenerateMap(struct AGMGenerator* p_agm_generator)
+{
+    int retcode = gm_api_gen_map(&p_agm_generator->gamutMapParams, &p_agm_generator->agmParams);
+
+    return retcode ? TMG_RET_ERROR_GMLIB : TMG_RET_OK;
+}
+
+enum TMGReturnCode AGMGenerator_GMGenerate3DLUT(struct AGMGenerator* p_agm_generator)
+{
+    int retcode = gm_api_gen_3dlut(&p_agm_generator->gamutMapParams, &p_agm_generator->agmParams);
+
+    return retcode ? TMG_RET_ERROR_GMLIB : TMG_RET_OK;
+}
+
+void AGMGenerator_GMCtor(struct AGMGenerator* p_agm_generator)
+{
+    gm_ctor(&p_agm_generator->agmParams, p_agm_generator->allocFunc, p_agm_generator->freeFunc, p_agm_generator->memoryContext);
+}
+
+void AGMGenerator_Exit(struct AGMGenerator* p_agm_generator)
+{
+    gm_dtor(&p_agm_generator->agmParams);
+}
\ No newline at end of file
diff --git a/src/amd/gmlib/ToneMapGenerator/src/src/ToneMapGenerator.c b/src/amd/gmlib/ToneMapGenerator/src/src/ToneMapGenerator.c
new file mode 100755
index 00000000000..8677f5d3354
--- /dev/null
+++ b/src/amd/gmlib/ToneMapGenerator/src/src/ToneMapGenerator.c
@@ -0,0 +1,354 @@
+/* Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "ToneMapGenerator.h"
+#include "AGMGenerator.h"
+#include "CSCGenerator.h"
+#include <stdlib.h>
+#include <string.h>
+
+/* Defines comes from ColorPrimaryTable.h */
+struct ToneMapHdrMetaData BT2020Container = {
+    (unsigned short)(0.708 * 50000),  (unsigned short)(0.292 * 50000),
+    (unsigned short)(0.17 * 50000),   (unsigned short)(0.797 * 50000),
+    (unsigned short)(0.131 * 50000),  (unsigned short)(0.046 * 50000),
+    (unsigned short)(0.3127 * 50000), (unsigned short)(0.3290 * 50000),
+    (unsigned int)(10000 * 10000),    (unsigned int)(0.05 * 10000),
+    (unsigned short)10000,
+    (unsigned short)10000
+};
+
+struct ToneMapHdrMetaData DCIP3Container = {
+    (unsigned short)(0.68 * 50000),   (unsigned short)(0.32 * 50000),
+    (unsigned short)(0.265 * 50000),  (unsigned short)(0.69 * 50000),
+    (unsigned short)(0.15 * 50000),   (unsigned short)(0.06 * 50000),
+    (unsigned short)(0.3127 * 50000), (unsigned short)(0.3290 * 50000),
+    (unsigned int)(10000 * 10000),    (unsigned int)(0.05 * 10000),
+    (unsigned short)10000,
+    (unsigned short)10000
+};
+
+struct ToneMapHdrMetaData BT709Container = {
+    (unsigned short)(0.64 * 50000),   (unsigned short)(0.33 * 50000),
+    (unsigned short)(0.30 * 50000),   (unsigned short)(0.60 * 50000),
+    (unsigned short)(0.15 * 50000),   (unsigned short)(0.06 * 50000),
+    (unsigned short)(0.3127 * 50000), (unsigned short)(0.3290 * 50000),
+    (unsigned int)(10000 * 10000),    (unsigned int)(0.05 * 10000),
+    (unsigned short)10000,
+    (unsigned short)10000
+};
+
+struct ToneMapHdrMetaData BT601Container = {
+    (unsigned short)(0.63 * 50000),   (unsigned short)(0.34 * 50000),
+    (unsigned short)(0.31 * 50000),   (unsigned short)(0.595 * 50000),
+    (unsigned short)(0.155 * 50000),  (unsigned short)(0.07 * 50000),
+    (unsigned short)(0.3127 * 50000), (unsigned short)(0.3290 * 50000),
+    (unsigned int)(10000 * 10000),    (unsigned int)(0.05 * 10000),
+    (unsigned short)10000,
+    (unsigned short)10000
+};
+
+
+//Function declaration
+enum ToneMapColorPrimaries ToneMapGenerator_GetLutColorIn(void);
+enum ToneMapColorPrimaries ToneMapGenerator_GetLutColorOut(
+    enum ToneMapTransferFunction outputContainerGamma,
+    enum ToneMapColorPrimaries   outputContainerPrimaries);
+enum ToneMapTransferFunction ToneMapGenerator_GetShaperTf(
+    enum ToneMapTransferFunction inputContainerGamma);
+enum ToneMapTransferFunction ToneMapGenerator_GetLutOutTf(
+    enum ToneMapTransferFunction outputContainerGamma,
+    enum ToneMapColorPrimaries   outputContainerPrimaries);
+unsigned short ToneMapGenerator_GetInputNormFactor(
+    const struct ToneMapHdrMetaData* streamMetaData);
+bool ToneMapGenerator_CacheSrcTmParams(
+    struct ToneMapGenerator* p_tmGenerator,
+    const struct ToneMapHdrMetaData* streamMetaData,
+    enum ToneMapTransferFunction inputContainerGamma);
+bool ToneMapGenerator_CacheDstTmParams(
+    struct ToneMapGenerator* p_tmGenerator,
+    const struct ToneMapHdrMetaData* dstMetaData,
+    enum ToneMapTransferFunction outputContainerGamma,
+    enum ToneMapColorPrimaries   outputContainerPrimaries);
+enum TMGReturnCode ToneMapGenerator_GenerateLutData(
+    struct ToneMapGenerator*         p_tmGenerator,
+    const struct ToneMapHdrMetaData* streamMetaData,
+    const struct ToneMapHdrMetaData* dstMetaData,
+    enum ToneMapAlgorithm            tmAlgorithm,
+    bool                             updateSrcParams,
+    bool                             updateDstParams,
+    struct ToneMappingParameters*    tmParams);
+struct ToneMapHdrMetaData ToneMapGenerator_GetColorContainerData(
+    enum ToneMapColorPrimaries containerColor);
+bool ToneMapGenerator_ContentEqualsContainer(
+    const struct ToneMapHdrMetaData* contentMetaData,
+    const struct ToneMapHdrMetaData* containerPrimaries);
+
+
+enum TMGReturnCode ToneMapGenerator_GenerateToneMappingParameters(
+    struct ToneMapGenerator*         p_tmGenerator,
+    const struct ToneMapHdrMetaData* streamMetaData,
+    const struct ToneMapHdrMetaData* dstMetaData,
+    enum ToneMapTransferFunction     inputContainerGamma,
+    enum ToneMapTransferFunction     outputContainerGamma,
+    enum ToneMapColorPrimaries       outputContainerPrimaries,
+    unsigned short                   lutDim,
+    struct ToneMappingParameters*    tmParams)
+{
+
+    enum TMGReturnCode ret = TMG_RET_OK;
+    bool updateSrcParams;
+    bool updateDstParams;
+
+    if (!p_tmGenerator->memAllocSet) {
+        ret = TMG_RET_ERROR_NOT_INITIALIZED;
+        goto exit;
+    }
+
+    tmParams->lutOutTf                 = ToneMapGenerator_GetLutOutTf(outputContainerGamma, outputContainerPrimaries);
+    tmParams->lutColorIn               = ToneMapGenerator_GetLutColorIn();
+    tmParams->lutColorOut              = ToneMapGenerator_GetLutColorOut(outputContainerGamma, outputContainerPrimaries);
+    tmParams->shaperTf                 = ToneMapGenerator_GetShaperTf(inputContainerGamma);
+    tmParams->formattedLutData         = NULL;
+    tmParams->lutDim                   = lutDim;
+    tmParams->inputNormalizationFactor = ToneMapGenerator_GetInputNormFactor(streamMetaData);
+
+    updateSrcParams = ToneMapGenerator_CacheSrcTmParams(p_tmGenerator, streamMetaData, inputContainerGamma);
+    updateDstParams = ToneMapGenerator_CacheDstTmParams(p_tmGenerator, dstMetaData, outputContainerGamma, outputContainerPrimaries);
+
+    ret = ToneMapGenerator_GenerateLutData(p_tmGenerator, streamMetaData, dstMetaData, p_tmGenerator->tmAlgo, updateSrcParams, updateDstParams,  tmParams);
+
+exit:
+    return ret;
+}
+
+enum ToneMapColorPrimaries ToneMapGenerator_GetLutColorIn()
+{
+    return TMG_CP_BT2020;
+}
+
+enum ToneMapColorPrimaries ToneMapGenerator_GetLutColorOut(
+    enum ToneMapTransferFunction outputContainerGamma,
+    enum ToneMapColorPrimaries   outputContainerPrimaries)
+{
+    enum ToneMapColorPrimaries lutOutPrimaries;
+
+    if (outputContainerGamma == TMG_TF_Linear)
+        lutOutPrimaries = TMG_CP_BT2020;
+    else
+        lutOutPrimaries = outputContainerPrimaries;
+
+    return lutOutPrimaries;
+}
+
+enum ToneMapTransferFunction ToneMapGenerator_GetShaperTf(
+    enum ToneMapTransferFunction inputContainerGamma)
+{
+    enum ToneMapTransferFunction shaperTf;
+
+    switch (inputContainerGamma) {
+    case(TMG_TF_PQ):
+    case(TMG_TF_Linear):
+        shaperTf = TMG_TF_NormalizedPQ;
+        break;
+    default:
+        shaperTf = inputContainerGamma;
+        break;
+    }
+
+    return shaperTf;
+}
+
+enum ToneMapTransferFunction ToneMapGenerator_GetLutOutTf(
+    enum ToneMapTransferFunction outputContainerGamma,
+    enum ToneMapColorPrimaries   outputContainerPrimaries)
+{
+    enum ToneMapTransferFunction lutOutTf;
+
+    if (outputContainerGamma == TMG_TF_Linear ||
+        outputContainerGamma == TMG_TF_PQ)
+        lutOutTf = TMG_TF_PQ;
+    else
+        lutOutTf = outputContainerGamma;
+
+    return lutOutTf;
+}
+
+struct ToneMapHdrMetaData ToneMapGenerator_GetColorContainerData(enum ToneMapColorPrimaries containerColor) {
+
+    switch (containerColor) {
+    case (TMG_CP_BT601):
+        return BT601Container;
+        break;
+    case (TMG_CP_BT709):
+        return BT709Container;
+        break;
+    case (TMG_CP_BT2020):
+        return BT2020Container;
+        break;
+    case (TMG_CP_DCIP3):
+        return DCIP3Container;
+        break;
+    default:
+        return BT2020Container;
+        break;
+    }
+
+}
+
+unsigned short ToneMapGenerator_GetInputNormFactor(const struct ToneMapHdrMetaData* streamMetaData) {
+
+    unsigned short normFactor;
+
+    if (streamMetaData->maxMasteringLuminance < INPUT_NORMALIZATION_FACTOR)
+        normFactor = INPUT_NORMALIZATION_FACTOR;
+    else
+        normFactor = streamMetaData->maxMasteringLuminance;
+
+    return normFactor;
+}
+
+bool ToneMapGenerator_ContentEqualsContainer(
+    const struct ToneMapHdrMetaData* contentMetaData,
+    const struct ToneMapHdrMetaData* containerPrimaries)
+{
+
+    if (abs(contentMetaData->bluePrimaryX  - containerPrimaries->redPrimaryX)   < 2 &&
+        abs(contentMetaData->redPrimaryY   - containerPrimaries->redPrimaryY)   < 2 &&
+        abs(contentMetaData->greenPrimaryX - containerPrimaries->greenPrimaryX) < 2 &&
+        abs(contentMetaData->greenPrimaryY - containerPrimaries->greenPrimaryY) < 2 &&
+        abs(contentMetaData->bluePrimaryX  - containerPrimaries->bluePrimaryX)  < 2 &&
+        abs(contentMetaData->bluePrimaryY  - containerPrimaries->bluePrimaryY)  < 2)
+        return true;
+    else
+        return false;
+}
+
+/*
+    Tone map generation consists of three steps:
+    1. Container to content color space conversion.
+    2. Tone mapping and gamut mapping operation.
+    3. Content to output container color space conversion.
+
+    These operations are cascaded one after the other. The enable3DLUTMerge will tell each module
+    whether or not to start from scratch, or use the previous blocks output as the nextbloack input.
+
+    The terminology "Content Color Space / Container Color Space" is used to distinguish
+    between the color volume of the content and the color volume of the container. 
+    For example, the content color volume might be DCIP3 and the Container might be BT2020.
+    CSC step changes the representation of the content to align with its color volume.
+*/
+enum TMGReturnCode ToneMapGenerator_GenerateLutData(
+    struct ToneMapGenerator*         p_tmGenerator,
+    const struct ToneMapHdrMetaData* streamMetaData,
+    const struct ToneMapHdrMetaData* dstMetaData,
+    enum ToneMapAlgorithm            tmAlgorithm,
+    bool                             updateSrcParams,
+    bool                             updateDstParams,
+    struct ToneMappingParameters*    tmParams)
+{
+
+    bool enable3DLUTMerge           = false;
+    struct ToneMapHdrMetaData lutContainer = ToneMapGenerator_GetColorContainerData(tmParams->lutColorIn);
+
+    if (!ToneMapGenerator_ContentEqualsContainer(streamMetaData, &lutContainer)) {
+        lutContainer.maxMasteringLuminance = streamMetaData->maxMasteringLuminance;
+        lutContainer.minMasteringLuminance = streamMetaData->minMasteringLuminance;
+
+        CSCGenerator_ApplyCSC(
+            &lutContainer,
+            tmParams->shaperTf,
+            streamMetaData,
+            tmParams->shaperTf,
+            tmParams,
+            enable3DLUTMerge);
+
+        enable3DLUTMerge = true;
+    }
+
+    AGMGenerator_ApplyToneMap(
+        &p_tmGenerator->agmGenerator,
+        streamMetaData,
+        dstMetaData,
+        tmAlgorithm,
+        tmParams,
+        updateSrcParams,
+        updateDstParams,
+        enable3DLUTMerge);
+
+    enable3DLUTMerge = true;
+
+    lutContainer = ToneMapGenerator_GetColorContainerData(tmParams->lutColorOut);
+    if (!ToneMapGenerator_ContentEqualsContainer(dstMetaData, &lutContainer)) {
+        lutContainer.maxMasteringLuminance = dstMetaData->maxMasteringLuminance;
+        lutContainer.minMasteringLuminance = dstMetaData->minMasteringLuminance;
+
+        CSCGenerator_ApplyCSC(
+            dstMetaData,
+            tmParams->lutOutTf,
+            &lutContainer,
+            tmParams->lutOutTf,
+            tmParams,
+            enable3DLUTMerge
+            );
+    }
+
+    return TMG_RET_OK;
+}
+
+bool ToneMapGenerator_CacheSrcTmParams(
+    struct ToneMapGenerator* p_tmGenerator,
+    const struct ToneMapHdrMetaData* streamMetaData,
+    enum ToneMapTransferFunction inputContainerGamma)
+{
+    bool updateSrcParams =  memcmp(streamMetaData, &p_tmGenerator->cachedSrcTmParams.streamMetaData, sizeof(struct ToneMapHdrMetaData)) ||
+        inputContainerGamma != p_tmGenerator->cachedSrcTmParams.inputContainerGamma;
+
+    if (updateSrcParams) {
+        memcpy(&p_tmGenerator->cachedSrcTmParams.streamMetaData, streamMetaData, sizeof(struct ToneMapHdrMetaData));
+        p_tmGenerator->cachedSrcTmParams.inputContainerGamma = inputContainerGamma;
+    }
+
+    return updateSrcParams;
+}
+
+bool ToneMapGenerator_CacheDstTmParams(
+    struct ToneMapGenerator* p_tmGenerator,
+    const struct ToneMapHdrMetaData* dstMetaData,
+    enum ToneMapTransferFunction outputContainerGamma,
+    enum ToneMapColorPrimaries   outputContainerPrimaries)
+{
+    bool updateDstParams = memcmp(dstMetaData, &p_tmGenerator->cachedDstTmParams.dstMetaData, sizeof(struct ToneMapHdrMetaData)) ||
+        outputContainerGamma != p_tmGenerator->cachedDstTmParams.outputContainerGamma ||
+        outputContainerPrimaries != p_tmGenerator->cachedDstTmParams.outputContainerPrimaries;
+
+    if (updateDstParams){
+        memcpy(&p_tmGenerator->cachedDstTmParams.dstMetaData, dstMetaData, sizeof(struct ToneMapHdrMetaData));
+        p_tmGenerator->cachedDstTmParams.outputContainerGamma     = outputContainerGamma;
+        p_tmGenerator->cachedDstTmParams.outputContainerPrimaries = outputContainerPrimaries;
+        p_tmGenerator->cachedDstTmParams.outputContainerPrimaries = outputContainerPrimaries;
+    }
+
+    return updateDstParams;
+}
+
+enum TMGReturnCode ToneMapGenerator_SetInternalAllocators(
+    struct ToneMapGenerator* p_tmGenerator,
+    TMGAlloc                 allocFunc,
+    TMGFree                  freeFunc,
+    void*                    memCtx)
+{
+    enum TMGReturnCode ret = AGMGenerator_SetGMAllocator(
+        &p_tmGenerator->agmGenerator,
+        allocFunc,
+        freeFunc,
+        memCtx);
+
+    p_tmGenerator->memAllocSet = true;
+
+    return ret;
+}
\ No newline at end of file
diff --git a/src/amd/gmlib/gm/cs_funcs.c b/src/amd/gmlib/gm/cs_funcs.c
new file mode 100755
index 00000000000..4ae443a162a
--- /dev/null
+++ b/src/amd/gmlib/gm/cs_funcs.c
@@ -0,0 +1,1418 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : cs_funcs.c
+ * Purpose    : Color Space functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : September 20, 2023
+ * Version    : 1.4
+ *----------------------------------------------------------------------
+ *
+ */
+
+#ifndef GM_SIM
+#pragma code_seg("PAGED3PC")
+#pragma data_seg("PAGED3PD")
+#pragma const_seg("PAGED3PR")
+#endif
+
+#include "cs_funcs.h"
+
+static const MATFLOAT cs_vec_gamma[EGT_CUSTOM][4] = {
+    /* c1        c2              c3          c4 */
+    {1.0000,    1.00,            0.00,       0.000},        /* linear                */
+    {1.0990,    0.45,            4.50,       0.018},        /* 709 (SD/HD)           */
+    {1.0000,    1.0 / 2.1992,    0.0,        0.0},          /* Adobe RGB 1998        */
+    {1.0000,    1.0 / 2.6,       0.0,        0.0},          /* DCI-P3 (SMPTE-231-2)  */
+    {1.0000,    1.0 / 1.8,       0.0,        0.0},          /* Apple Trinitron       */
+    {1.0550,    1.0 / 2.4,       12.92,      0.0031308},    /* sRGB                  */
+    {0.0000,    0.0,             0.0,        0.0},          /* PQ                    */
+    {0.5000,    0.0,             0.0,        0.0},          /* HLG                   */
+    {1.0000,    1.0 / 2.2,       0.0,        0.0},          /* Gamma 2.2             */
+    {1.0000,    1.0 / 2.4,       0.0,        0.0}           /* Gamma 2.4             */
+};
+
+static const MATFLOAT cs_vec_color_space[ECST_CUSTOM][8] = {
+    /* Red (x, y), Green (x,y), Blue (x,y), White (x,y) */
+    {0.6400, 0.3300, 0.3000, 0.6000, 0.1500, 0.0600, 0.312710, 0.329020},    /* ITU_R BT.709-5/sRGB (HDTV) */
+    {0.6300, 0.3400, 0.3100, 0.5950, 0.1550, 0.0700, 0.312710, 0.329020},    /* SMPTE RP 145 (SDTV)        */
+    {0.6400, 0.3300, 0.2100, 0.7100, 0.1500, 0.0600, 0.312710, 0.329020},    /* Adobe RGB (1998)           */
+    {0.6800, 0.3200, 0.2650, 0.6900, 0.1500, 0.0600, 0.312710, 0.329020},    /* DCI P3 (SMPTE-231-2) P3D65 */
+/*  {0.6800, 0.3200, 0.2650, 0.6900, 0.1500, 0.0600, 0.314000, 0.351000},    // DCI P3 (SMPTE-231-2) P3D60 */
+/*  {0.6800, 0.3200, 0.2650, 0.6900, 0.1500, 0.0600, 0.314000, 0.351000},    // DCI P3 (SMPTE-231-2) P3DCI */
+    {0.6250, 0.3400, 0.2800, 0.5950, 0.1550, 0.0700, 0.312710, 0.329020},    /* Apple                      */
+    {0.6400, 0.3300, 0.2900, 0.6000, 0.1500, 0.0600, 0.312710, 0.329020},    /* EBU 3213/ITU (PAL/SEQAM)   */
+    {0.6700, 0.3300, 0.2100, 0.7100, 0.1400, 0.0800, 0.310100, 0.316200},    /* NTSC 1953                  */
+    {0.7350, 0.2650, 0.2740, 0.7170, 0.1660, 0.0090, 0.333300, 0.333300},    /* CIE RGB                    */
+    {0.7080, 0.2920, 0.1700, 0.7970, 0.1310, 0.0460, 0.312710, 0.329020}     /* BT.2020                    */
+};
+
+static MATFLOAT cs_vec_white_point[EWPT_NUM][3] = {
+    /* x, y, z */
+    {1.000000, 1.000000, 1.000000},    /* NONE                                            */
+    {0.447570, 0.407440, 0.144990},    /* A - Tungsten or Incandescent, 2856K             */
+    {0.348400, 0.351600, 0.300000},    /* B - Direct Sunlight at Noon, 4874K (obsolete)   */
+    {0.310060, 0.316150, 0.373790},    /* C - North Sky Daylight, 6774K                   */
+    {0.345670, 0.358500, 0.295830},    /* D50 - Daylight, used for COlor Rendering, 500K  */
+    {0.332420, 0.347430, 0.320150},    /* D55 - Daylight, used for Photograph, 5500K      */
+    {0.312710, 0.329020, 0.358270},    /* D65 - New version of North Sky Daylight, 6504K  */
+    {0.299020, 0.314850, 0.386130},    /* D75 - Daylight, 7500K                           */
+    {0.284800, 0.293200, 0.422000},    /* 9300K - High eff. blue phosphor monitors, 9300K */
+    {0.333330, 0.333330, 0.333340},    /* E - Uniform energy illuminant, 5400K            */
+    {0.372070, 0.375120, 0.252810},    /* F2 - Cool White Fluorescent (CWF), 4200K        */
+    {0.312850, 0.329180, 0.357970},    /* F7 - Broad-band Daylight Fluorescent, 6500K     */
+    {0.380540, 0.376910, 0.242540},    /* F11 - Narrow-band White Fluorescent, 4000K      */
+    {0.314000, 0.351000, 0.335000},    /* DCI-P3                                          */
+    {0.277400, 0.283600, 0.438660}     /* 11000K - blue sky, 11000K */
+};
+
+static const MATFLOAT cs_vec_cct_xy[2 * CS_CCT_SIZE] = {
+    0.652750, 0.344462, 0.638755, 0.356498, 0.625043, 0.367454, 0.611630, 0.377232, 0.598520, 0.385788, /* 1000 */
+    0.585716, 0.393121, 0.573228, 0.399264, 0.561066, 0.404274, 0.549243, 0.408225, 0.537776, 0.411202,
+    0.526676, 0.413297, 0.515956, 0.414601, 0.505624, 0.415207, 0.495685, 0.415201, 0.486142, 0.414665, /* 2000 */
+    0.476993, 0.413675, 0.468234, 0.412299, 0.459857, 0.410598, 0.451855, 0.408629, 0.444216, 0.406440,
+    0.436929, 0.404073, 0.429981, 0.401566, 0.423358, 0.398951, 0.417046, 0.396255, 0.411032, 0.393503, /* 3000 */
+    0.405302, 0.390715, 0.399841, 0.387907, 0.394638, 0.385095, 0.389677, 0.382291, 0.384948, 0.379505,
+    0.380438, 0.376746, 0.376135, 0.374019, 0.372029, 0.371332, 0.368108, 0.368687, 0.364364, 0.366090, /* 4000 */
+    0.360786, 0.363543, 0.357366, 0.361048, 0.354095, 0.358605, 0.350965, 0.356217, 0.347969, 0.353884,
+    0.345100, 0.351607, 0.342350, 0.349384, 0.339715, 0.347215, 0.337187, 0.345102, 0.334761, 0.343041, /* 5000 */
+    0.332433, 0.341034, 0.330196, 0.339078, 0.328047, 0.337173, 0.325981, 0.335317, 0.323994, 0.333511,
+    0.322082, 0.331752, 0.320241, 0.330039, 0.318468, 0.328371, 0.316760, 0.326747, 0.315113, 0.325166, /* 6000 */
+    0.313524, 0.323626, 0.311992, 0.322127, 0.310513, 0.320667, 0.309085, 0.319245, 0.307705, 0.317860,
+    0.306372, 0.316511, 0.305083, 0.315196, 0.303837, 0.313915, 0.302631, 0.312667, 0.301463, 0.311450, /* 7000 */
+    0.300333, 0.310264, 0.299238, 0.309108, 0.298178, 0.307981, 0.297149, 0.306881, 0.296153, 0.305809,
+    0.295186, 0.304763, 0.294247, 0.303743, 0.293337, 0.302747, 0.292453, 0.301775, 0.291594, 0.300826, /* 8000 */
+    0.290760, 0.299899, 0.289949, 0.298995, 0.289161, 0.298111, 0.288395, 0.297248, 0.287649, 0.296405,
+    0.286924, 0.295581, 0.286218, 0.294776, 0.285531, 0.293989, 0.284862, 0.293220, 0.284211, 0.292467, /* 9000 */
+    0.283576, 0.291732, 0.282957, 0.291012, 0.282354, 0.290308, 0.281765, 0.289619, 0.281192, 0.288945,
+    0.280632, 0.288286, 0.280086, 0.287640, 0.279553, 0.287007, 0.279033, 0.286388, 0.278525, 0.285782, /* 10000 */
+    0.278029, 0.285188, 0.277544, 0.284606, 0.277071, 0.284036, 0.276608, 0.283477, 0.276156, 0.282930,
+    0.275714, 0.282393, 0.275281, 0.281867, 0.274858, 0.281351, 0.274444, 0.280845, 0.274039, 0.280349, /* 11000 */
+    0.273643, 0.279862, 0.273255, 0.279384, 0.272875, 0.278915, 0.272503, 0.278455, 0.272139, 0.278004,
+    0.271782, 0.277561, 0.271433, 0.277126, 0.271090, 0.276699, 0.270755, 0.276279, 0.270426, 0.275867, /* 12000 */
+    0.270103, 0.275462, 0.269787, 0.275065, 0.269476, 0.274674, 0.269172, 0.274290, 0.268874, 0.273913,
+    0.268581, 0.273542, 0.268293, 0.273178, 0.268011, 0.272820, 0.267734, 0.272467, 0.267462, 0.272121, /* 13000 */
+    0.267195, 0.271780, 0.266933, 0.271445, 0.266676, 0.271116, 0.266423, 0.270791, 0.266174, 0.270472,
+    0.265930, 0.270158, 0.265690, 0.269849, 0.265454, 0.269545, 0.265223, 0.269246, 0.264995, 0.268952, /* 14000 */
+    0.264771, 0.268662, 0.264550, 0.268376, 0.264334, 0.268095, 0.264121, 0.267818, 0.263911, 0.267545,
+    0.263705, 0.267277, 0.263502, 0.267012, 0.263302, 0.266751, 0.263106, 0.266495, 0.262912, 0.266241, /* 15000 */
+    0.262722, 0.265992, 0.262534, 0.265746, 0.262350, 0.265504, 0.262168, 0.265265, 0.261989, 0.265030,
+    0.261813, 0.264798, 0.261640, 0.264569, 0.261469, 0.264343, 0.261300, 0.264121, 0.261134, 0.263901, /* 16000 */
+    0.260971, 0.263685, 0.260809, 0.263471, 0.260651, 0.263261, 0.260494, 0.263053, 0.260340, 0.262848,
+    0.260188, 0.262646, 0.260038, 0.262446, 0.259890, 0.262249, 0.259744, 0.262055, 0.259600, 0.261863, /* 17000 */
+    0.259458, 0.261674, 0.259318, 0.261487, 0.259180, 0.261302, 0.259044, 0.261120, 0.258910, 0.260940,
+    0.258778, 0.260762, 0.258647, 0.260587, 0.258518, 0.260414, 0.258390, 0.260243, 0.258265, 0.260074, /* 18000 */
+    0.258141, 0.259907, 0.258018, 0.259742, 0.257897, 0.259579, 0.257778, 0.259418, 0.257660, 0.259259,
+    0.257544, 0.259102, 0.257429, 0.258947, 0.257315, 0.258793, 0.257203, 0.258642, 0.257093, 0.258492, /* 19000 */
+    0.256983, 0.258344, 0.256875, 0.258197, 0.256768, 0.258052, 0.256663, 0.257909, 0.256559, 0.257768,
+    0.256456, 0.257628    /* 20000 */
+};
+
+const MATFLOAT *cs_get_gamma(enum cs_gamma_type gamma_type)
+{
+    return cs_vec_gamma[(gamma_type < EGT_CUSTOM) ? gamma_type : EGT_LINEAR];
+}
+
+const MATFLOAT *cs_get_color_space(enum cs_color_space_type color_space_type)
+{
+    return cs_vec_color_space[(color_space_type < ECST_CUSTOM) ? color_space_type : ECST_709];
+}
+
+const MATFLOAT *cs_get_white_point(enum cs_white_point_type white_point_type)
+{
+    return cs_vec_white_point[(white_point_type < EWPT_NUM) ? white_point_type : EWPT_NONE];
+}
+
+void cs_set_opts_def(struct s_cs_opts *ptr_cs_opts)
+{
+    int ni;
+
+    ptr_cs_opts->color_space_type = ECST_709;
+    ptr_cs_opts->gamma_type = EGT_709;
+    ptr_cs_opts->mode = 0;
+    ptr_cs_opts->pq_norm = 0.0;
+    ptr_cs_opts->luminance_limits[0] = 0.0;
+    ptr_cs_opts->luminance_limits[1] = 400.0;
+    for (ni = 0; ni < 8; ni++)
+        ptr_cs_opts->rgbw_xy[ni] = cs_get_color_space(ECST_709)[ni];
+    for (ni = 0; ni < 4; ni++)
+        ptr_cs_opts->gamma_parm[ni] = cs_get_gamma(EGT_LINEAR)[ni];
+}
+
+void cs_init(struct s_cs_opts *ptr_cs_opts, struct s_color_space *ptr_color_space)
+{
+    int ni;
+
+    ptr_color_space->color_space_type = ptr_cs_opts->color_space_type;
+    ptr_color_space->gamma_type = ptr_cs_opts->gamma_type;
+    ptr_color_space->mode = ptr_cs_opts->mode;
+    ptr_color_space->pq_norm = (ptr_cs_opts->pq_norm > 0.0) ?
+        cs_gamma_pq(ptr_cs_opts->pq_norm / CS_MAX_LUMINANCE, EGD_LIN_2_NONLIN) : 0.0;
+
+    ptr_color_space->luminance_limits[0] = (MATFLOAT)ptr_cs_opts->luminance_limits[0] / CS_MAX_LUMINANCE;
+    ptr_color_space->luminance_limits[1] = (MATFLOAT)ptr_cs_opts->luminance_limits[1] / CS_MAX_LUMINANCE;
+    ptr_color_space->luminance_limits[2] = ptr_color_space->luminance_limits[1] -
+            ptr_color_space->luminance_limits[0];
+
+    for (int ni = 0; ni < 8; ni++)
+        ptr_color_space->rgbw_xy[ni] = (ptr_cs_opts->color_space_type < ECST_CUSTOM) ?
+                cs_get_color_space(ptr_cs_opts->color_space_type)[ni] : ptr_cs_opts->rgbw_xy[ni];
+
+    for (ni = 0; ni < 4; ni++)
+        ptr_color_space->gamma_parm[ni] = (ptr_cs_opts->gamma_type < EGT_CUSTOM) ?
+                cs_get_gamma(ptr_cs_opts->gamma_type)[ni] : (MATFLOAT)ptr_cs_opts->gamma_parm[ni];
+
+    cs_init_private(ptr_color_space);
+}
+
+void cs_init_private(struct s_color_space *ptr_color_space)
+{
+    static MATFLOAT mat_xyz2lms[3][3] = {
+        /* ITU-R BT.2390-4, p36. */
+        { 0.3592, 0.6976, -0.0358},
+        {-0.1922, 1.1004,  0.0755},
+        { 0.0070, 0.0749,  0.8434}
+    };
+    static MATFLOAT mat_lms2xyz[3][3] = {
+        /* ITU-R BT.2390-4, p36. */
+        { 2.0701800566956132, -1.3264568761030211,  0.2066160068478551},
+        { 0.3649882500326574,  0.6804673628522352, -0.0454217530758532},
+        {-0.0495955422389321, -0.0494211611867575,  1.1879959417328037}
+    };
+    static MATFLOAT mat_lms2itp[3][3] = {
+        /* ITU-R BT.2020, BT.2390-4, p.36 */
+        {             0.5,               0.5,              0.0},
+        { 6610.0 / 4096.0, -13613.0 / 4096.0,  7003.0 / 4096.0},
+        {17933.0 / 4096.0, -17390.0 / 4096.0,  -543.0 / 4096.0}
+    };
+    static MATFLOAT mat_itp2lms[3][3] = {
+        /* ITU-R BT.2020, BT.2390-4, p.36 */
+        {1.0,  0.00860903703793276,  0.11102962500302596},
+        {1.0, -0.00860903703793276, -0.11102962500302596},
+        {1.0,  0.56003133571067909, -0.32062717498731885}
+    };
+
+    int ni, nj;
+
+    cs_luminance_to_luma_limits(ptr_color_space->luminance_limits, ptr_color_space->luma_limits);
+    mat_3x3_unity(ptr_color_space->mat_chad);
+
+    /* set white point */
+    ptr_color_space->white_xyz[0] = ptr_color_space->rgbw_xy[6];
+    ptr_color_space->white_xyz[1] = ptr_color_space->rgbw_xy[7];
+    ptr_color_space->white_xyz[2] = 1.0;
+    cs_xyy_to_xyz(ptr_color_space->white_xyz, ptr_color_space->white_xyz);
+
+    /* generate RGB to XYZ and back matrixes */
+    cs_genmat_rgb_to_xyz(ptr_color_space->rgbw_xy, ptr_color_space->mat_rgb2xyz);
+    if (ptr_color_space->mode & CS_CHAD_D65) {
+        /* Chromatic Adaptation from Color Space to D65 (BT.2020) */
+        MATFLOAT mat_tmp[3][3];
+
+        cs_genmat_chad(&ptr_color_space->rgbw_xy[6], (MATFLOAT *)cs_get_white_point(EWPT_D65),
+            ptr_color_space->mat_chad);
+        mat_copy3x3(ptr_color_space->mat_rgb2xyz, mat_tmp);
+        mat_mul3x3(ptr_color_space->mat_chad, mat_tmp, ptr_color_space->mat_rgb2xyz);
+    }
+    mat_inv3x3(ptr_color_space->mat_rgb2xyz, ptr_color_space->mat_xyz2rgb);
+
+    for (ni = 0; ni < 3; ni++)
+        for (nj = 0; nj < 3; nj++) {
+            ptr_color_space->mat_lms2itp[ni][nj] = mat_lms2itp[ni][nj];
+            ptr_color_space->mat_itp2lms[ni][nj] = mat_itp2lms[ni][nj];
+        }
+
+    mat_mul3x3(mat_xyz2lms, ptr_color_space->mat_rgb2xyz, ptr_color_space->mat_rgb2lms);
+    mat_mul3x3(ptr_color_space->mat_xyz2rgb, mat_lms2xyz, ptr_color_space->mat_lms2rgb);
+
+    ptr_color_space->cct = cs_xy_to_cct(&ptr_color_space->rgbw_xy[6]);
+
+    ptr_color_space->hlg_system_gamma = cs_hlg_system_gamma(ptr_color_space->luminance_limits[1]);
+    ptr_color_space->hlg_beta = mat_sqrt(3.0 * mat_pow(ptr_color_space->luminance_limits[0] /
+        ptr_color_space->luminance_limits[1], 1.0 / ptr_color_space->hlg_system_gamma));
+}
+
+void cs_copy(struct s_color_space *ptr_color_space_src, struct s_color_space *ptr_color_space_dst)
+{
+    ptr_color_space_dst->color_space_type = ptr_color_space_src->color_space_type;
+    ptr_color_space_dst->gamma_type = ptr_color_space_src->gamma_type;
+    ptr_color_space_dst->mode = ptr_color_space_src->mode;
+    ptr_color_space_dst->pq_norm = ptr_color_space_src->pq_norm;
+    int ni, nj;
+
+    for (ni = 0; ni < 3; ni++)
+        ptr_color_space_dst->luminance_limits[ni] = ptr_color_space_src->luminance_limits[ni];
+    for (ni = 0; ni < 8; ni++)
+        ptr_color_space_dst->rgbw_xy[ni] = ptr_color_space_src->rgbw_xy[ni];
+    for (ni = 0; ni < 4; ni++)
+        ptr_color_space_dst->gamma_parm[ni] = ptr_color_space_src->gamma_parm[ni];
+    for (ni = 0; ni < 3; ni++)
+        for (nj = 0; nj < 3; nj++) {
+            ptr_color_space_dst->mat_rgb2xyz[ni][nj] = ptr_color_space_src->mat_rgb2xyz[ni][nj];
+            ptr_color_space_dst->mat_xyz2rgb[ni][nj] = ptr_color_space_src->mat_xyz2rgb[ni][nj];
+            ptr_color_space_dst->mat_chad[ni][nj] = ptr_color_space_src->mat_chad[ni][nj];
+            ptr_color_space_dst->mat_rgb2lms[ni][nj] = ptr_color_space_src->mat_rgb2lms[ni][nj];
+            ptr_color_space_dst->mat_lms2rgb[ni][nj] = ptr_color_space_src->mat_lms2rgb[ni][nj];
+            ptr_color_space_dst->mat_lms2itp[ni][nj] = ptr_color_space_src->mat_lms2itp[ni][nj];
+            ptr_color_space_dst->mat_itp2lms[ni][nj] = ptr_color_space_src->mat_itp2lms[ni][nj];
+        }
+    for (ni = 0; ni < 3; ni++)
+        ptr_color_space_dst->white_xyz[ni] = ptr_color_space_src->white_xyz[ni];
+    ptr_color_space_dst->cct = ptr_color_space_src->cct;
+}
+
+void cs_luminance_to_luma_limits(MATFLOAT luminance_limits[2], MATFLOAT luma_limits[3])
+{
+    luma_limits[0] = cs_gamma_pq(luminance_limits[0], EGD_LIN_2_NONLIN);
+    luma_limits[1] = cs_gamma_pq(luminance_limits[1], EGD_LIN_2_NONLIN);
+    luma_limits[2] = luma_limits[1] - luma_limits[0];
+}
+
+void cs_xyy_to_xyz(MATFLOAT xyy_inp[3], MATFLOAT xyz_out[3])
+{    /* output may be the same as input */
+    MATFLOAT xyy_tmp[3];
+
+    mat_copy(xyy_inp, xyy_tmp, 3);
+    xyz_out[0] = (xyy_tmp[1] > 0.0) ? xyy_tmp[2] * xyy_tmp[0] / xyy_tmp[1] : 0.0;
+    xyz_out[1] = xyy_tmp[2];
+    xyz_out[2] = (xyy_tmp[1] > 0.0) ? xyy_tmp[2] * (1.0 - xyy_tmp[0] - xyy_tmp[1]) / xyy_tmp[1] : 0.0;
+}
+
+void cs_xyz_to_xyy(MATFLOAT xyz_inp[3], MATFLOAT xyy_out[3])
+{    /* output may be the same as input */
+    MATFLOAT sum = xyz_inp[0] + xyz_inp[1] + xyz_inp[2];
+
+    xyy_out[2] = xyz_inp[1];
+    xyy_out[1] = (sum > 0.0) ? xyz_inp[1] / sum : 0.0;
+    xyy_out[0] = (sum > 0.0) ? xyz_inp[0] / sum : 0.0;
+}
+
+void cs_xyzc_to_xyz(MATFLOAT xyz_inp[3], MATFLOAT xyz_out[3])
+{    /* output may be the same as input */
+    MATFLOAT sum = xyz_inp[0] + xyz_inp[1] + xyz_inp[2];
+
+    xyz_out[0] = (sum > 0.0) ? xyz_inp[0] / sum : 0.0;
+    xyz_out[1] = (sum > 0.0) ? xyz_inp[1] / sum : 0.0;
+    xyz_out[2] = 1.0 - xyz_out[0] - xyz_out[1];
+}
+
+void cs_xyz_to_xyzc(MATFLOAT xyz_inp[3], MATFLOAT xyz_out[3])
+{    /* output may be the same as input */
+    MATFLOAT xyz_tmp[3];
+
+    mat_copy(xyz_inp, xyz_tmp, 3);
+    xyz_out[0] = (xyz_tmp[1] > 0.0) ? xyz_tmp[0] / xyz_tmp[1] : 0.0;
+    xyz_out[1] = 1.0;
+    xyz_out[2] = (xyz_tmp[1] > 0.0) ? xyz_tmp[2] / xyz_tmp[1] : 0.0;
+}
+
+void cs_rgb_to_itp(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT itp_out[3])
+{    /* output may be the same as input */
+    MATFLOAT lms[3];
+    int nc;
+
+    mat_eval_3x3(ptr_color_space->mat_rgb2lms, rgb_inp, lms);
+    for (nc = 0; nc < 3; nc++)
+        lms[nc] = cs_gamma_pq(lms[nc], EGD_LIN_2_NONLIN);
+    mat_eval_3x3(ptr_color_space->mat_lms2itp, lms, itp_out);
+}
+
+void cs_itp_to_rgb(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT rgb_out[3])
+{    /* output may be the same as input */
+    MATFLOAT lms[3];
+    int nc;
+
+    mat_eval_3x3(ptr_color_space->mat_itp2lms, itp_inp, lms);
+    for (nc = 0; nc < 3; nc++)
+        lms[nc] = cs_gamma_pq(lms[nc], EGD_NONLIN_2_LIN);
+    mat_eval_3x3(ptr_color_space->mat_lms2rgb, lms, rgb_out);
+}
+
+void cs_ich_to_itp(MATFLOAT ich_inp[3], MATFLOAT itp_out[3])
+{    /* output must not be the same as input */
+    itp_out[0] = ich_inp[0];
+    itp_out[1] = ich_inp[1] * mat_cos(ich_inp[2]);
+    itp_out[2] = ich_inp[1] * mat_sin(ich_inp[2]);
+}
+
+void cs_itp_to_ich(MATFLOAT itp_inp[3], MATFLOAT ich_out[3])
+{    /* output must not be the same as input */
+    ich_out[0] = itp_inp[0];
+    ich_out[1] = mat_radius(itp_inp[2], itp_inp[1]);
+    ich_out[2] = mat_angle(itp_inp[2], itp_inp[1]);
+}
+
+void cs_rgb_to_yuv(MATFLOAT rgb_inp[3], MATFLOAT yuv_out[3])
+{    /* RGB to YCbCr709 from Charles Poynton "Digital Video and HD: Algorithms and Interfaces", p.371 */
+    static MATFLOAT vec_off_inp[3] = { 0.0, 0.0, 0.0 };
+    static MATFLOAT vec_off_out[3] = { 0.0, 0.5, 0.5 };
+    static MATFLOAT mat_rgb_to_yuv[3][3] = {
+        /*   R         G           B   */
+        {  0.2126,         0.7152,       0.0722 },
+        { -0.11457211,    -0.38542789,   0.5 },
+        {  0.5,            -0.45415291,  -0.04584709}
+    };
+
+    mat_eval_off_3x3_off(vec_off_inp, mat_rgb_to_yuv, vec_off_out, rgb_inp, yuv_out);
+    cs_clamp_rgb(yuv_out, 0.0, 1.0);
+}
+
+void cs_yuv_to_rgb(MATFLOAT yuv_inp[3], MATFLOAT rgb_out[3])
+{    /* YCbCr709 to RGB from Charles Poynton "Digital Video and HD: Algorithms and Interfaces", p.371 */
+    static MATFLOAT vec_off_inp[3] = { 0.0, -0.5, -0.5 };
+    static MATFLOAT vec_off_out[3] = { 0.0,  0.0,  0.0 };
+    static MATFLOAT mat_yuv_to_rgb[3][3] = {
+        /*    Y        Cb        Cr */
+        { 1.0,   0.0,          1.5748 },
+        { 1.0,  -0.187324273, -0.468124273 },
+        { 1.0,   1.8556,       0.0 }
+    };
+
+    mat_eval_off_3x3_off(vec_off_inp, mat_yuv_to_rgb, vec_off_out, yuv_inp, rgb_out);
+    cs_clamp_rgb(rgb_out, 0.0, 1.0);
+}
+
+void cs_nlin_to_lin_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3])
+{
+    if (ptr_color_space->gamma_type == EGT_HLG)
+        cs_hlg_eotf(rgb_inp, rgb_out, ptr_color_space->luminance_limits,
+            ptr_color_space->hlg_system_gamma, ptr_color_space->hlg_beta);
+    else
+        for (int nc = 0; nc < 3; nc++)
+            rgb_out[nc] = cs_nlin_to_lin(ptr_color_space, rgb_inp[nc]);
+}
+
+MATFLOAT cs_nlin_to_lin(struct s_color_space *ptr_color_space, MATFLOAT val_inp)
+{
+    MATFLOAT val_out;
+
+    if (ptr_color_space->gamma_type == EGT_PQ) {
+        /* HDR PQ encoded signal is normilized to a range [0.0,1.0],
+            where 0.0 mapped to 0.0 and 1.0 mapped to PQ-1(pq_norm) */
+        if (ptr_color_space->pq_norm > 0.0)
+            val_out = mat_denorm(val_inp, 0.0, ptr_color_space->pq_norm);
+        else
+            val_out = val_inp;
+        val_out = mat_clamp(val_out, 0.0, 1.0);
+        val_out = cs_gamma(val_out, ptr_color_space->gamma_parm, EGD_NONLIN_2_LIN);
+    }
+    else {
+        /* SDR encoded signal is normilized to a range [0.0,1.0],
+            where 0.0 mapped to Black (0,0,0) and 1.0 mapped to White (1,1,1) */
+        val_out = cs_gamma(val_inp, ptr_color_space->gamma_parm, EGD_NONLIN_2_LIN);
+        val_out = mat_denorm(val_out, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[2]);
+        val_out = mat_clamp(val_out, 0.0, 1.0);
+    }
+
+    return val_out;
+}
+
+void cs_lin_to_nlin_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3])
+{
+    if (ptr_color_space->gamma_type == EGT_HLG)
+        cs_hlg_oetf(rgb_inp, rgb_out, ptr_color_space->luminance_limits[1], ptr_color_space->hlg_system_gamma);
+    else
+        for (int nc = 0; nc < 3; nc++)
+            rgb_out[nc] = cs_lin_to_nlin(ptr_color_space, rgb_inp[nc]);
+}
+
+MATFLOAT cs_lin_to_nlin(struct s_color_space *ptr_color_space, MATFLOAT val_inp)
+{
+    MATFLOAT val_out;
+
+    if (ptr_color_space->gamma_type == EGT_PQ) {
+        /* HDR PQ encoded signal is normilized to a range [0.0,1.0],
+            where 0.0 mapped to 0.0 and 1.0 mapped to PQ-1(pq_norm) */
+        val_out = cs_gamma(val_inp, ptr_color_space->gamma_parm, EGD_LIN_2_NONLIN);
+        if (ptr_color_space->pq_norm > 0.0)
+            val_out = mat_norm(val_out, 0.0, ptr_color_space->pq_norm);
+        val_out = mat_clamp(val_out, 0.0, 1.0);
+    }
+    else {
+        /* SDR encoded signal is normilized to a range [0.0,1.0],
+            where 0.0 mapped to Black (0,0,0) and 1.0 mapped to White (1,1,1) */
+        val_out = mat_norm(val_inp, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[2]);
+        val_out = mat_clamp(val_out, 0.0, 1.0);
+        val_out = cs_gamma(val_out, ptr_color_space->gamma_parm, EGD_LIN_2_NONLIN);
+    }
+
+    return val_out;
+}
+
+int cs_genmat_rgb_to_xyz(MATFLOAT rgbw[8], MATFLOAT mat_rgb2xyz[3][3])
+{
+    MATFLOAT white_xyz[3] = { rgbw[6], rgbw[7], 1.0 };
+    MATFLOAT mat[3][3], mat_inv[3][3], white_k[3];
+    int ni, nc;
+    int rc;
+
+    for (ni = 0; ni < 3; ni++) {    /* X, Y, Z */
+        mat[0][ni] = rgbw[2 * ni + 0] / rgbw[2 * ni + 1];
+        mat[1][ni] = 1.0;
+        mat[2][ni] = (1.0 - rgbw[2 * ni + 0] - rgbw[2 * ni + 1]) / rgbw[2 * ni + 1];
+    }
+    rc = mat_inv3x3(mat, mat_inv);
+    cs_xyy_to_xyz(white_xyz, white_xyz);
+    mat_eval_3x3(mat_inv, white_xyz, white_k);
+    for (ni = 0; ni < 3; ni++)
+        for (nc = 0; nc < 3; nc++)
+            mat_rgb2xyz[nc][ni] = white_k[ni] * mat[nc][ni];
+
+    return rc;
+}
+
+int cs_genmat_xyz_to_rgb(MATFLOAT rgbw_xy[8], MATFLOAT mat_xyz2rgb[3][3])
+{
+    MATFLOAT mat_rgb2xyz[3][3];
+
+    cs_genmat_rgb_to_xyz(rgbw_xy, mat_rgb2xyz);
+    return mat_inv3x3(mat_rgb2xyz, mat_xyz2rgb);
+}
+
+int cs_genmat_rgb_to_rgb(MATFLOAT rgbw_xy_src[8], MATFLOAT rgbw_xy_dst[8], MATFLOAT mat_rgb2rgb[3][3], int en_chad)
+{
+    MATFLOAT mat_rgb2xyz[3][3], mat_xyz2rgb[3][3], mat_chad[3][3];
+    int rc;
+
+    cs_genmat_rgb_to_xyz(rgbw_xy_src, mat_rgb2xyz);
+    rc = cs_genmat_xyz_to_rgb(rgbw_xy_dst, mat_xyz2rgb);
+
+    if (en_chad) { /* Chromatic Adaptation */
+        MATFLOAT mat_tmp[3][3];
+
+        cs_genmat_chad(&rgbw_xy_src[6], &rgbw_xy_dst[6], mat_chad);
+        mat_copy3x3(mat_rgb2xyz, mat_tmp);
+        mat_mul3x3(mat_chad, mat_tmp, mat_rgb2xyz);
+    }
+
+    mat_mul3x3(mat_xyz2rgb, mat_rgb2xyz, mat_rgb2rgb);
+
+    return rc;
+}
+
+int cs_genmat_chad(MATFLOAT white_xy_src[2], MATFLOAT white_xy_dst[2], MATFLOAT mat_chad[3][3])
+{
+    static MATFLOAT mat_bradford[3][3] = {
+        /* Bradford matrix */
+        { 0.8951000,  0.2664000, -0.1614000},
+        {-0.7502000,  1.7135000,  0.0367000},
+        { 0.0389000, -0.0685000,  1.0296000}
+    };
+
+    static MATFLOAT mat_bradford_inv[3][3] = {
+        /* Bradford inverse matrix */
+        { 0.9869929, -0.1470543, 0.1599627},
+        { 0.4323053,  0.5183603, 0.0492912},
+        {-0.0085287,  0.0400428, 0.9684867}
+    };
+
+#if 0    /* Not in used */
+    static MATFLOAT mat_von_kries[3][3] = {
+        /* Von Kries matrix */
+        { 0.4002400, 0.7076000, -0.0808100},
+        {-0.2263000, 1.1653200,  0.0457000},
+        { 0.0000000, 0.0000000,  0.9182200}
+    };
+
+    static MATFLOAT mat_von_kries_inv[3][3] = {
+        /* Von Kries inverse matrix */
+        {1.8599364, -1.1293816,  0.2198974},
+        {0.3611914,  0.6388125, -0.0000064},
+        {0.0000000,  0.0000000,  1.0890636}
+    };
+#endif
+
+    MATFLOAT vec_white_xyz_src[3] = { white_xy_src[0], white_xy_src[1], 1.0 };
+    MATFLOAT vec_white_xyz_dst[3] = { white_xy_dst[0], white_xy_dst[1], 1.0 };
+    MATFLOAT vec_lms[3][3];
+    MATFLOAT rgb_src[3], rgb_dst[3];
+    MATFLOAT mat_tmp[3][3];
+    int nc;
+
+    /* convert to XYZ */
+    cs_xyy_to_xyz(vec_white_xyz_src, vec_white_xyz_src);
+    cs_xyy_to_xyz(vec_white_xyz_dst, vec_white_xyz_dst);
+    /* generate scales */
+    mat_3x3_unity(vec_lms);
+    mat_eval_3x3(mat_bradford, vec_white_xyz_src, rgb_src);
+    mat_eval_3x3(mat_bradford, vec_white_xyz_dst, rgb_dst);
+    for (nc = 0; nc < 3; nc++)
+        vec_lms[nc][nc] = rgb_dst[nc] / rgb_src[nc];
+    /* normalize */
+    mat_mul3x3(vec_lms, mat_bradford, mat_tmp);
+    mat_mul3x3(mat_bradford_inv, mat_tmp, mat_chad);
+
+    return 0;
+}
+
+MATFLOAT cs_gamma(MATFLOAT val, MATFLOAT gamma_parm[4], enum cs_gamma_dir gamma_dir)
+{
+    MATFLOAT val_out;
+
+    if (gamma_parm[0] == 0.0)
+        val_out = cs_gamma_pq(val, gamma_dir);
+    else if (gamma_parm[0] == 0.5)
+        val_out = cs_gamma_hlg(val, gamma_dir);
+    else {
+        MATFLOAT c1 = gamma_parm[0];
+        MATFLOAT c2 = gamma_parm[1];
+        MATFLOAT c3 = gamma_parm[2];
+        MATFLOAT c4 = gamma_parm[3];
+
+        if (gamma_dir == EGD_LIN_2_NONLIN)
+            val_out = ((val < c4) ? val * c3 : c1 * mat_pow(val, c2) + 1.0 - c1);
+        else
+            val_out = (val < c4 * c3) ? val / c3 : mat_pow((val + c1 - 1.0) / c1, 1.0 / c2);
+    }
+
+    return val_out;
+}
+
+/* R_REC-BT.2100-2-2 Table 4 */
+/* input must be in arange [0,1] normilized to [0,10000]cd/m^2 in linear or non-linear space */
+/* output must be in a range [0,1] normilized to [0,10000]cd/m^2 in linear or non-linear space */
+MATFLOAT cs_gamma_pq(MATFLOAT val, enum cs_gamma_dir gamma_dir)
+{
+    static const MATFLOAT s_m1 = 0.1593017578125;
+    static const MATFLOAT s_m2 = 78.84375;
+    static const MATFLOAT s_c1 = 0.8359375;
+    static const MATFLOAT s_c2 = 18.8515625;
+    static const MATFLOAT s_c3 = 18.6875;
+
+    MATFLOAT sign = (val < 0.0) ? -1.0 : 1.0;
+    MATFLOAT val_out = MAT_ABS(val);
+    MATFLOAT t1, t2, t;
+
+    if (gamma_dir == EGD_LIN_2_NONLIN) { /* linear to PQ */
+        MATFLOAT x = mat_pow(val_out, s_m1);
+
+        t1 = (s_c2 * x) + s_c1;
+        t2 = 1.0 + (s_c3 * x);
+        t = t1 / t2;
+        val_out = mat_pow(t, s_m2);
+    } else { /* PQ to linear */
+        MATFLOAT np = mat_pow(val_out, 1.0 / s_m2);
+
+        t1 = np - s_c1;
+        t1 = MAT_MAX(t1, 0.0);
+        t2 = s_c2 - (s_c3 * np);
+        t = t1 / t2;
+        val_out = mat_pow(t, 1.0 / s_m1);
+    }
+    val_out *= sign;
+
+    return val_out;
+}
+
+/* EOTF 1886 */
+/* input must be in arange [0,1] normilized to [Lb,Lw]cd/m^2 in non-linear space */
+/* output must be in arange [0,1] normilized to [0,10000]cd/m^2 in linear space */
+/* lb in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */
+/* lw in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */
+MATFLOAT cs_gamma_1886(MATFLOAT val, MATFLOAT lb, MATFLOAT lw, MATFLOAT gamma)
+{
+    MATFLOAT lb_nl = mat_pow(lb, 1.0 / gamma);
+    MATFLOAT lw_nl = mat_pow(lw, 1.0 / gamma);
+    MATFLOAT a = mat_pow(lw_nl - lb_nl, gamma);
+    MATFLOAT b = lb_nl / (lw_nl - lb_nl);
+
+    return a * mat_pow(MAT_MAX(val + b, 0.0), gamma);
+}
+
+/* rgb_inp[] in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */
+/* rgb_out[] in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */
+void cs_pq_ootf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3])
+{
+    int nc;
+
+    for (nc = 0; nc < 3; nc++) {
+        MATFLOAT e = rgb_inp[nc] * 59.5208;
+        MATFLOAT e709 = (e <= 0.018) ? 4.5 * e : 1.099 * mat_pow(e, 0.45) - 0.099; /* OETF 709 */
+        MATFLOAT e1886 = mat_pow(e709, 2.4) / 100.0; /* EOTF 1886 */
+
+        rgb_out[nc] = MAT_CLAMP(e1886, 0.0, 1.0);
+    }
+}
+
+/* BT.2390 display referred */
+/* rgb_inp[] in a range [0,1] normalized to [0,100]cd/m^2 in non-linear space */
+/* rgb_out[] in a range [0,1] normalized to [0,10000]cd/m^2 in non-linear space */
+void cs_sdr_to_pq(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT en_709_2020)
+{
+    MATFLOAT sdr_lb = 0.0;
+    MATFLOAT sdr_lw = 100.0 / CS_MAX_LUMINANCE;
+    MATFLOAT sdr_gamma = 2.4;
+    MATFLOAT scale = 2.0;
+    MATFLOAT rgb_lin[3];
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        rgb_lin[nc] = cs_gamma_1886(rgb_inp[nc], sdr_lb, sdr_lw, sdr_gamma); /* [0,10000]cd/m^2 */
+
+    if (en_709_2020) {
+        MATFLOAT rgb_tmp[3];
+
+        mat_copy(rgb_lin, rgb_tmp, 3);
+        mat_eval_3x3(cs_mat_709_2020, rgb_tmp, rgb_lin); /* [0,10000]cd/m^2 */
+    }
+
+    for (nc = 0; nc < 3; nc++)
+        rgb_lin[nc] = rgb_lin[nc] * scale; /* scale to 200cd/m^2 */
+
+    cs_gamma_rgb(rgb_lin, rgb_out, (MATFLOAT *)cs_get_gamma(EGT_PQ), EGD_LIN_2_NONLIN); /* [0,10000]cd/m^2 */
+}
+
+void cs_gamma_rgb(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT gamma_parm[4], enum cs_gamma_dir gamma_dir)
+{    /* output may be the same as input */
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        rgb_out[nc] = cs_gamma(rgb_inp[nc], gamma_parm, gamma_dir);
+}
+
+int cs_min_rgb(MATFLOAT rgb[3], MATFLOAT val_min)
+{
+    int is_clip = 0;
+    int nc;
+
+    for (nc = 0; nc < 3; nc++) {
+        MATFLOAT value = rgb[nc];
+
+        rgb[nc] = MAT_MAX(value, val_min);
+        is_clip |= (rgb[nc] == value) ? 0 : 1;
+    }
+
+    return is_clip;
+}
+
+int cs_max_rgb(MATFLOAT rgb[3], MATFLOAT val_max)
+{
+    int is_clip = 0;
+    int nc;
+
+    for (nc = 0; nc < 3; nc++) {
+        MATFLOAT value = rgb[nc];
+
+        rgb[nc] = MAT_MIN(value, val_max);
+        is_clip |= (rgb[nc] == value) ? 0 : 1;
+    }
+
+    return is_clip;
+}
+
+int cs_is_valid_ic(struct s_color_space *ptr_color_space, MATFLOAT pnt_ic[2], MATFLOAT hue_sin_cos[2])
+{
+    MATFLOAT pnt_itp[3];
+
+    pnt_itp[0] = pnt_ic[0];
+    pnt_itp[1] = pnt_ic[1] * hue_sin_cos[1];
+    pnt_itp[2] = pnt_ic[1] * hue_sin_cos[0];
+
+    return cs_is_valid_itp(ptr_color_space, pnt_itp);
+}
+
+int cs_is_valid_itp(struct s_color_space *ptr_color_space, MATFLOAT itp[3])
+{
+    MATFLOAT rgb[3];
+
+    cs_itp_to_rgb(ptr_color_space, itp, rgb);
+
+    return cs_is_valid_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[1]);
+}
+
+int cs_is_valid_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max)
+{
+    return mat_is_valid_vec(rgb, 3, val_min, val_max);
+}
+
+int cs_clip_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max)
+{
+    int is_clip = cs_is_valid_rgb(rgb, val_min, val_max);
+
+    if (is_clip == 0)
+        cs_clamp_rgb(rgb, val_min, val_max);
+
+    return is_clip ? 0 : 1;
+}
+
+void cs_clamp_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max)
+{
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        rgb[nc] = mat_clamp(rgb[nc], val_min, val_max);
+}
+
+void cs_norm_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_rng)
+{
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        rgb[nc] = mat_norm(rgb[nc], val_min, val_rng);
+}
+
+void cs_denorm_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_rng)
+{
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        rgb[nc] = mat_denorm(rgb[nc], val_min, val_rng);
+}
+
+void cs_int2flt_rgb(int rgb_inp[3], MATFLOAT rgb_out[3], int val_max)
+{
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        rgb_out[nc] = mat_int2flt(rgb_inp[nc], val_max);
+}
+
+void cs_flt2int_rgb(MATFLOAT rgb_inp[3], int rgb_out[3], int val_max)
+{
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        rgb_out[nc] = mat_flt2int(rgb_inp[nc], val_max);
+}
+
+
+void cs_short2flt_rgb(unsigned short rgb_inp[3], MATFLOAT rgb_out[3], int val_max)
+{
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        rgb_out[nc] = mat_int2flt(rgb_inp[nc], val_max);
+}
+
+void cs_flt2short_rgb(MATFLOAT rgb_inp[3], unsigned short rgb_out[3], int val_max)
+{
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        rgb_out[nc] = mat_flt2int(rgb_inp[nc], val_max);
+}
+
+void cs_genprim_itp(struct s_color_space *ptr_color_space, int num_prim,
+        MATFLOAT *ptr_prim_rgb, MATFLOAT *ptr_prim_ich)
+{
+    int nk, nc;
+
+    for (nk = 0; nk < num_prim; nk++) {
+        MATFLOAT rgb[3], vec_itp[3], vec_ich[3];
+
+        mat_copy(&ptr_prim_rgb[3 * nk], rgb, 3);
+        cs_denorm_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[2]);
+        cs_rgb_to_itp(ptr_color_space, rgb, vec_itp);
+        cs_itp_to_ich(vec_itp, vec_ich);
+        for (nc = 0; nc < 3; nc++)
+            ptr_prim_ich[num_prim * nc + nk] = vec_ich[nc];
+    }
+}
+
+MATFLOAT cs_soft_clip(MATFLOAT val, MATFLOAT limits_src[3], MATFLOAT limits_dst[3])
+{    /* Based on BT.2390 - Src must be wider then Dst */
+    const MATFLOAT epsilon = 0.000001;
+    MATFLOAT val_min = (limits_dst[0] - limits_src[0]) / (limits_src[1] - limits_src[0]);
+    MATFLOAT val_max = (limits_dst[1] - limits_src[0]) / (limits_src[1] - limits_src[0]);
+    MATFLOAT ks = (1.5 * val_max) - 0.5;
+    MATFLOAT e0, e1, e2, e3, e4;
+
+    /* Input value must be normilized to [0.0,1.0] */
+    e0 = val;
+    e1 = mat_norm(e0, limits_src[0], limits_src[2]);
+    e1 = mat_clamp(e1, 0.0, 1.0);
+
+    if (e1 < ks)
+        e2 = e1;
+    else {
+        MATFLOAT t = ((1.0 - ks) <= epsilon) ? (e1 - ks) : ((e1 - ks) / (1.0 - ks));
+        MATFLOAT t2 = t * t;
+        MATFLOAT t3 = t2 * t;
+
+        e2 = (((2.0 * t3) - (3.0 * t2) + 1.0) * ks) + ((t3 - (2.0 * t2) + t) * (1.0 - ks)) + (((-2.0 * t3) +
+            (3.0 * t2)) * val_max);
+    }
+    e3 = e2 + val_min * mat_pow((1.0 - e2), 4.0);
+
+    /* Output value must be denormilized back to [limits_src[0], limits_src[1]] */
+    e4 = mat_denorm(e3, limits_src[0], limits_src[2]);
+    e4 = mat_clamp(e4, limits_src[0], limits_src[1]);
+
+    return e4;
+}
+
+MATFLOAT cs_gamma_to_gamma(MATFLOAT val, enum cs_gamma_type gamma_type_src, enum cs_gamma_type gamma_type_dst,
+    MATFLOAT luminance_limits_dst[3], MATFLOAT luma_limits_src[3], MATFLOAT luma_limits_dst[3],
+    MATFLOAT(*func_pq_to_pq)(MATFLOAT), int en_norm, int en_soft_clip)
+{
+    MATFLOAT val_out = cs_gamma(val, (MATFLOAT *)cs_get_gamma(gamma_type_src), EGD_NONLIN_2_LIN);    /* degamma */
+
+    if (en_norm)
+        val_out = mat_denorm(val_out, luminance_limits_dst[0], luminance_limits_dst[2]);/* denorm */
+    val_out = mat_clamp(val_out, luminance_limits_dst[0], luminance_limits_dst[1]);        /* clamp */
+    val_out = cs_gamma_pq(val_out, EGD_LIN_2_NONLIN);    /* LIN2PQ */
+    val_out = func_pq_to_pq(val_out);                    /* PQ2PQ transform */
+    if (en_soft_clip)
+        val_out = cs_soft_clip(val_out, luma_limits_src, luma_limits_dst);    /* SoftClip */
+    val_out = cs_gamma_pq(val_out, EGD_NONLIN_2_LIN);    /* PQ2LIN */
+    if (en_norm)
+        val_out = mat_norm(val_out, luminance_limits_dst[0], luminance_limits_dst[2]);    /* norm */
+    val_out = mat_clamp(val_out, 0.0, 1.0);            /* clamp */
+    val_out = cs_gamma(val_out, (MATFLOAT *)cs_get_gamma(gamma_type_dst), EGD_LIN_2_NONLIN);    /* regamma */
+
+    return val_out;
+}
+
+int cs_xy_to_cct(MATFLOAT xy[2])
+{ /* McCamy�s polynomial formula for CCT */
+    MATFLOAT val = (xy[0] - 0.3320) / (xy[1] - 0.1858);
+    MATFLOAT val2 = val * val;
+    MATFLOAT val3 = val * val2;
+    MATFLOAT cct = -449.0 * val3 + 3525.0 * val2 - 6823.0 * val + 5520.33;
+
+    return MAT_ROUND(cct);
+}
+
+void cs_cct_to_xy(int cct, MATFLOAT xy[2])
+{
+    int val = MAT_CLAMP(cct, CS_CCT_MIN, CS_CCT_MAX) - CS_CCT_MIN;
+    int vec_ind[2];
+    MATFLOAT phase;
+    MATFLOAT vec_x[2], vec_y[2];
+
+    vec_ind[0] = val / CS_CCT_INC;
+    vec_ind[1] = MAT_MIN(vec_ind[0] + 1, CS_CCT_SIZE - 1);
+    phase = (MATFLOAT)(val - vec_ind[0] * CS_CCT_INC) / (MATFLOAT)CS_CCT_INC;
+
+    vec_x[0] = cs_vec_cct_xy[2 * vec_ind[0] + 0];
+    vec_x[1] = cs_vec_cct_xy[2 * vec_ind[1] + 0];
+    vec_y[0] = cs_vec_cct_xy[2 * vec_ind[0] + 1];
+    vec_y[1] = cs_vec_cct_xy[2 * vec_ind[1] + 1];
+
+    xy[0] = mat_linear(vec_x, phase);
+    xy[1] = mat_linear(vec_y, phase);
+}
+
+void cs_csc(struct s_color_space *ptr_cs_src, struct s_color_space *ptr_cs_dst,
+    MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], int en_chad)
+{
+    MATFLOAT rgb_tmp[3];
+    MATFLOAT mat_remap[3][3];
+
+    cs_genmat_rgb_to_rgb(ptr_cs_src->rgbw_xy, ptr_cs_dst->rgbw_xy, mat_remap, en_chad);
+
+    cs_nlin_to_lin_rgb(ptr_cs_src, rgb_inp, rgb_tmp);
+    mat_eval_3x3(mat_remap, rgb_tmp, rgb_out);
+    cs_clamp_rgb(rgb_out, 0.0, 1.0);
+    cs_lin_to_nlin_rgb(ptr_cs_dst, rgb_out, rgb_out);
+}
+
+int cs_is_space(struct s_color_space *ptr_color_space,
+    enum cs_color_space_type color_space_type, enum cs_gamma_type gamma_type)
+{
+    return ((ptr_color_space->color_space_type == color_space_type) &&
+        (ptr_color_space->gamma_type == gamma_type)) ? 1 : 0;
+}
+
+void cs_init_type(MATFLOAT luminance_limits[2],
+    enum cs_color_space_type color_space_type, enum cs_gamma_type gamma_type,
+    struct s_color_space *ptr_color_space)
+{
+    struct s_cs_opts cs_opts = {0};
+
+    cs_opts.color_space_type = color_space_type;
+    cs_opts.gamma_type = gamma_type;
+    cs_opts.mode = 0;
+    cs_opts.pq_norm = 0.0;
+    cs_opts.luminance_limits[0] = luminance_limits[0];
+    cs_opts.luminance_limits[1] = luminance_limits[1];
+
+    cs_init(&cs_opts, ptr_color_space);
+}
+
+void cs_init_BT709(MATFLOAT luminance_limits[2], struct s_color_space *ptr_color_space)
+{
+    cs_init_type(luminance_limits, ECST_709, EGT_709, ptr_color_space);
+}
+
+void cs_init_BT2100(MATFLOAT luminance_limits[2], struct s_color_space *ptr_color_space)
+{
+    cs_init_type(luminance_limits, ECST_BT2020, EGT_PQ, ptr_color_space);
+}
+
+void cs_rgb_to_ycbcr2020(MATFLOAT rgb_inp[3], MATFLOAT ycbcr_out[3])
+{    /* ITU-R BT.2020 */
+    ycbcr_out[0] = 0.2627 * rgb_inp[0] + 0.678 * rgb_inp[1] + 0.0593 * rgb_inp[2];
+    ycbcr_out[1] = (rgb_inp[2] - ycbcr_out[0]) / 1.8814;
+    ycbcr_out[2] = (rgb_inp[0] - ycbcr_out[0]) / 1.4746;
+}
+
+/* gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */
+/* luminance_peak in a range [0,1] normilized to [0,10000]cd/m^2 in linear space */
+MATFLOAT cs_ootf_gamma_peak(MATFLOAT gamma, MATFLOAT luminance_peak)
+{    /* gamma correction for peak luminance of the display */
+    return gamma * mat_pow(1.111, mat_log2(luminance_peak / 0.1));    /* normzlized to 1000 nits */
+}
+
+/* gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */
+/* luminance_ambient in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - ambient light in linear space */
+MATFLOAT cs_ootf_gamma_amb(MATFLOAT gamma, MATFLOAT luminance_ambient)
+{    /* gamma correction for ambient light */
+    return gamma * mat_pow(0.98, mat_log2(luminance_ambient / 0.0005));    /* normalized to 5 nits */
+}
+
+MATFLOAT cs_gamma_adjust_sdr(MATFLOAT gamma, MATFLOAT luminance_peak)
+{
+    /* gamma correction for peak luminance of the display */
+    if (luminance_peak <= 0.1)
+        gamma = gamma * mat_pow(1.111, mat_log2(luminance_peak / 0.01));
+    else if ((luminance_peak > 0.1) && (luminance_peak < 0.2))
+        gamma = gamma + ((luminance_peak > 0.1) ? 0.42 * mat_log10(luminance_peak / 0.1) : 0.0);
+    else
+        gamma = gamma * mat_pow(1.111, mat_log2(luminance_peak / 0.1));
+
+    return gamma;
+}
+
+void cs_chad_gains(MATFLOAT rgbw_xy[8], MATFLOAT w_xy[2], MATFLOAT rgb_gain[3])
+{
+    MATFLOAT rgb_white[3] = { 1.0, 1.0, 1.0 };
+    MATFLOAT max_gain = 0.0;
+    MATFLOAT mat_rgb2xyz[3][3], mat_xyz2rgb[3][3];
+    MATFLOAT mat_chad[3][3];
+    MATFLOAT xyz_inp[3], xyz_out[3];
+    int nc;
+
+    /* generate RGB to XYZ and back transformation matrixes */
+    cs_genmat_rgb_to_xyz(rgbw_xy, mat_rgb2xyz);
+    mat_inv3x3(mat_rgb2xyz, mat_xyz2rgb);
+    /* generate matrix of white point conversion from display to target */
+    cs_genmat_chad(&rgbw_xy[6], w_xy, mat_chad);
+    /* map white to gains */
+    mat_eval_3x3(mat_rgb2xyz, rgb_white, xyz_inp);
+    mat_eval_3x3(mat_chad, xyz_inp, xyz_out);
+    mat_eval_3x3(mat_xyz2rgb, xyz_out, rgb_gain);
+    /* normalize gains to max */
+    for (nc = 0; nc < 3; nc++)
+        max_gain = MAT_MAX(max_gain, rgb_gain[nc]);
+    for (nc = 0; nc < 3; nc++)
+        rgb_gain[nc] = rgb_gain[nc] / max_gain;
+}
+
+void cs_genmat_cct(struct s_color_space *ptr_cs, int cct_shift, int norm, MATFLOAT mat_cct[3][3])
+{
+    MATFLOAT xy[2];
+    MATFLOAT mat_chad[3][3];
+    MATFLOAT mat_tmp[3][3];
+
+    cs_cct_to_xy(ptr_cs->cct + cct_shift, xy);
+    cs_genmat_chad(&ptr_cs->rgbw_xy[6], xy, mat_chad);
+    mat_mul3x3(mat_chad, ptr_cs->mat_rgb2xyz, mat_tmp);
+    mat_mul3x3(ptr_cs->mat_xyz2rgb, mat_tmp, mat_cct);
+
+    if (norm) {
+        MATFLOAT rgb_white[3] = { 1.0, 1.0, 1.0 };
+        MATFLOAT max_gain = 0.0;
+        MATFLOAT rgb_gain[3];
+        int nc, ni;
+
+        mat_eval_3x3(mat_cct, rgb_white, rgb_gain);
+        for (nc = 0; nc < 3; nc++)
+            max_gain = MAT_MAX(max_gain, rgb_gain[nc]);
+        for (nc = 0; nc < 3; nc++)
+            for (ni = 0; ni < 3; ni++)
+                mat_cct[nc][ni] = mat_cct[nc][ni] / max_gain;
+    }
+}
+
+int cs_rgb_to_vsh(MATFLOAT rgb[3], MATFLOAT vsh[3])
+{
+    MATFLOAT r = rgb[0];
+    MATFLOAT g = rgb[1];
+    MATFLOAT b = rgb[2];
+    MATFLOAT val_min, val_max, delta;
+
+    val_max = (g > b) ? g : b;
+    if (r > val_max)
+        val_max = r;
+
+    val_min = (g < b) ? g : b;
+    if (r < val_min)
+        val_min = r;
+
+    vsh[0] = val_max;
+    delta = val_max - val_min;
+
+    if ((val_max != 0.0) && (delta != 0.0))
+        vsh[1] = delta / val_max;
+    else {
+        vsh[2] = 0.0;
+        vsh[1] = 0.0;
+        return 1;
+    }
+
+    if (r == val_max)
+        vsh[2] = (g - b) / delta;
+    else if (g == val_max)
+        vsh[2] = 2.0 + (b - r) / delta;
+    else
+        vsh[2] = 4.0 + (r - g) / delta;
+
+    vsh[2] = vsh[2] * mat_get_pi() / 3.0;
+    vsh[2] = mat_norm_angle(vsh[2]);    /* [0.0, 2PI) */
+
+    return 0;
+}
+
+void cs_vsh_to_rgb(MATFLOAT vsh[3], MATFLOAT rgb[3])
+{
+    MATFLOAT v = vsh[0];
+    MATFLOAT s = vsh[1];
+
+    MATFLOAT r = v;
+    MATFLOAT g = v;
+    MATFLOAT b = v;
+
+    if (s > 0.0) {
+        MATFLOAT h = 3.0 * vsh[2] / mat_get_pi();
+        int ni = MAT_CLAMP((int)h, 0, 5);
+        MATFLOAT f = h - (MATFLOAT)ni;
+        MATFLOAT p = v * (1.0 - s);
+        MATFLOAT q = v * (1.0 - s * f);
+        MATFLOAT t = v * (1.0 - s * (1.0 - f));
+
+        switch (ni) {
+        case 0:
+            r = v;
+            g = t;
+            b = p;
+            break;
+        case 1:
+            r = q;
+            g = v;
+            b = p;
+            break;
+        case 2:
+            r = p;
+            g = v;
+            b = t;
+            break;
+        case 3:
+            r = p;
+            g = q;
+            b = v;
+            break;
+        case 4:
+            r = t;
+            g = p;
+            b = v;
+            break;
+        case 5:
+            r = v;
+            g = p;
+            b = q;
+            break;
+        }
+    }
+
+    rgb[0] = r;
+    rgb[1] = g;
+    rgb[2] = b;
+}
+
+/* YUV functions */
+void cs_yuv_to_ysh(MATFLOAT yuv_inp[3], MATFLOAT ysh_out[3])
+{
+    ysh_out[0] = yuv_inp[0];
+    ysh_out[1] = mat_radius(yuv_inp[2] - 0.5, yuv_inp[1] - 0.5);
+    ysh_out[2] = mat_angle(yuv_inp[2] - 0.5, yuv_inp[1] - 0.5);
+}
+
+void cs_ysh_to_yuv(MATFLOAT ysh_inp[3], MATFLOAT yuv_out[3])
+{
+    yuv_out[0] = ysh_inp[0];
+    yuv_out[1] = ysh_inp[1] * mat_cos(ysh_inp[2]) + 0.5;
+    yuv_out[2] = ysh_inp[1] * mat_sin(ysh_inp[2]) + 0.5;
+}
+
+/* CIE LAB functions */
+void cs_rgb_to_lab(MATFLOAT rgb[3], MATFLOAT lab[3], struct s_color_space *ptr_color_space)
+{
+    MATFLOAT xyz[3];
+
+    cs_gamma_rgb(rgb, rgb, ptr_color_space->gamma_parm, EGD_NONLIN_2_LIN);
+    mat_eval_3x3(ptr_color_space->mat_rgb2xyz, rgb, xyz);
+    cs_xyz_to_lab(xyz, lab, ptr_color_space->white_xyz);
+}
+
+void cs_lab_to_rgb(MATFLOAT lab[3], MATFLOAT rgb[3], struct s_color_space *ptr_color_space)
+{
+    MATFLOAT xyz[3];
+
+    cs_lab_to_xyz(lab, xyz, ptr_color_space->white_xyz);
+    mat_eval_3x3(ptr_color_space->mat_xyz2rgb, xyz, rgb);
+    cs_clip_rgb(rgb, 0.0, 1.0);
+    cs_gamma_rgb(rgb, rgb, ptr_color_space->gamma_parm, EGD_LIN_2_NONLIN);
+}
+
+void cs_xyz_to_lab(MATFLOAT xyz[3], MATFLOAT lab[3], MATFLOAT white_xyz[3])
+{
+    int nc;
+    MATFLOAT f[3], ft;
+
+    for (nc = 0; nc < 3; nc++) {
+        ft = xyz[nc] / white_xyz[nc];
+        f[nc] = (ft > CS_LAB_E) ? mat_pow(ft, 1.0 / 3.0) : (CS_LAB_K * ft + 16.0) / 116.0;
+    }
+
+    lab[0] = 116.0f * f[1] - 16.0;
+    lab[1] = 500.0f * (f[0] - f[1]);
+    lab[2] = 200.0f * (f[1] - f[2]);
+}
+
+void cs_lab_to_xyz(MATFLOAT lab[3], MATFLOAT xyz[3], MATFLOAT white_xyz[3])
+{
+    int nc;
+    MATFLOAT f[3];
+    MATFLOAT ft = (lab[0] + 16.0) / 116.0;
+
+    f[0] = ft + lab[1] / 500.0;
+    f[1] = ft;
+    f[2] = ft - lab[2] / 200.0;
+
+    xyz[0] = mat_pow(f[0], 3.0);
+    if (xyz[0] <= CS_LAB_E)
+        xyz[0] = (116.0 * f[0] - 16.0) / CS_LAB_K;
+
+    if (lab[0] > CS_LAB_K * CS_LAB_E)
+        xyz[1] = mat_pow((lab[0] + 16.0) / 116.0, 3.0);
+    else
+        xyz[1] = lab[0] / CS_LAB_K;
+
+    xyz[2] = mat_pow(f[2], 3.0);
+    if (xyz[2] <= CS_LAB_E)
+        xyz[2] = (116.0 * f[2] - 16.0) / CS_LAB_K;
+
+    for (nc = 0; nc < 3; nc++)
+        xyz[nc] *= white_xyz[nc];
+}
+
+MATFLOAT cs_de94(MATFLOAT lab0[3], MATFLOAT lab1[3])
+{
+    static const MATFLOAT Kc = 1.0;
+    static const MATFLOAT Kh = 1.0;
+    static const MATFLOAT Kl = 1.0;
+    static const MATFLOAT K1 = 0.045;
+    static const MATFLOAT K2 = 0.015;
+
+    MATFLOAT dL = lab0[0] - lab1[0];
+    MATFLOAT C1 = mat_sqrt(lab0[1] * lab0[1] + lab0[2] * lab0[2]);
+    MATFLOAT C2 = mat_sqrt(lab1[1] * lab1[1] + lab1[2] * lab1[2]);
+    MATFLOAT dC = C1 - C2;
+
+    MATFLOAT da = lab0[1] - lab1[1];
+    MATFLOAT db = lab0[2] - lab1[2];
+    MATFLOAT tmp = da * da + db * db - dC * dC;
+    MATFLOAT dH = (tmp > 0) ? mat_sqrt(tmp) : 0.0;
+
+    MATFLOAT Sl = 1.0;
+    MATFLOAT Sc = 1.0 + K1 * C1;
+    MATFLOAT Sh = 1.0 + K2 * C1;
+
+    dL /= (Kl * Sl);
+    dC /= (Kc * Sc);
+    dH /= (Kh * Sh);
+
+    return mat_sqrt(dL * dL + dC * dC + dH * dH);
+}
+
+/* gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */
+/* luminance_peak in a range [0,1] normilized to [0,10000]cd/m^2 in linear space */
+/* luminance_amb in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - ambient light in linear space */
+MATFLOAT cs_gamma_adjust(MATFLOAT gamma, MATFLOAT luminance_peak, MATFLOAT luminance_amb)
+{
+    /* gamma correction for peak luminance of the display */
+    if (luminance_peak < 0.2)
+        gamma = gamma + ((luminance_peak > 0.1) ? 0.42 * mat_log10(luminance_peak / 0.1) : 0.0);
+    else
+        gamma = gamma * mat_pow(1.111, mat_log2(luminance_peak / 0.1));
+    /* gamma correction for ambient light */
+    gamma = gamma - 0.076 * mat_log10(luminance_amb / 5.0);
+
+    return gamma;
+}
+
+/* BT.2100 */
+/* input must be in arange [0,1] normilized to [0,Lw]cd/m^2 in linear or non-linear space */
+/* output must be in a range [0,1] normilized to [0,Lw]cd/m^2 in linear or non-linear space */
+MATFLOAT cs_gamma_hlg(MATFLOAT val, enum cs_gamma_dir gamma_dir)
+{
+    static const MATFLOAT s_a = 0.17883277;
+    static const MATFLOAT s_b = 0.28466892;
+    static const MATFLOAT s_c = 0.55991073;
+
+    MATFLOAT val_out;
+
+    if (gamma_dir == EGD_LIN_2_NONLIN)
+        val_out = (val <= (1.0 / 12.0)) ? mat_sqrt(3.0 * val) : s_a * mat_log(12.0 * val - s_b) + s_c;
+    else
+        val_out = (val <= 0.5) ? val * val / 3.0 : (mat_exp((val - s_c) / s_a) + s_b) / 12.0;
+
+    return MAT_CLAMP(val_out, 0.0, 1.0);
+}
+
+/* HLG OOTF */
+/* rgb_inp[] in a range [0,1] normalized to [0,Lw]cd/m^2 in linear space */
+/* rgb_out[] in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */
+/* luminance_peak in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */
+/* system_gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */
+void cs_hlg_ootf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma)
+{    /* output may be the same as input */
+    MATFLOAT ys = 0.2627 * rgb_inp[0] + 0.6780 * rgb_inp[1] + 0.0593 * rgb_inp[2];
+    MATFLOAT scale = mat_pow(ys, system_gamma - 1.0);
+    int nc;
+
+    for (nc = 0; nc < 3; nc++) {
+        rgb_out[nc] = rgb_inp[nc] * scale * luminance_peak;
+        rgb_out[nc] = MAT_CLAMP(rgb_out[nc], 0.0, 1.0);
+    }
+}
+
+/* HLG OOTF_INV */
+/* rgb_inp[] in a range [0,1] normalized to [0,10000]cd/m^2 in linear space */
+/* rgb_out[] in a range [0,1] normalized to [0,Lw]cd/m^2 in linear space */
+/* luminance_peak in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */
+/* system_gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */
+void cs_hlg_ootf_inv(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma)
+{    /* output may be the same as input */
+    MATFLOAT yd = (0.2627 * rgb_inp[0] + 0.6780 * rgb_inp[1] + 0.0593 * rgb_inp[2]) / luminance_peak;
+    MATFLOAT scale = mat_pow(yd, (1.0 - system_gamma) / system_gamma) / luminance_peak;
+    int nc;
+
+    for (nc = 0; nc < 3; nc++) {
+        rgb_out[nc] = rgb_inp[nc] * scale;
+        rgb_out[nc] = MAT_CLAMP(rgb_out[nc], 0.0, 1.0);
+    }
+}
+
+/* HLG OETF */
+/* rgb_inp[] in a range [0,1] normalized to [0,Lw]cd/m^2 in linear space */
+/* rgb_out[] in a range [0,1] normalized to [0,Lw]cd/m^2 in non-linear space */
+/* luminance_peak in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */
+/* system_gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */
+void cs_hlg_oetf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma)
+{    /* output may be the same as input */
+    int nc;
+
+    cs_hlg_ootf_inv(rgb_inp, rgb_out, luminance_peak, system_gamma);
+    for (nc = 0; nc < 3; nc++)
+        rgb_out[nc] = cs_gamma_hlg(rgb_out[nc], EGD_LIN_2_NONLIN);
+}
+
+/* HLG EOTF */
+/* rgb_inp[] in a range [0,1] normalized to [0,Lw]cd/m^2 in non-linear space */
+/* rgb_out[] in a range [0,1] normalized to [0,Lw]cd/m^2 in linear space */
+/* vec_luminace in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */
+/* system_gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */
+/* beta - user black level lift (= 0.0) */
+void cs_hlg_eotf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_limits[3],
+    MATFLOAT system_gamma, MATFLOAT beta)
+{    /* output may be the same as input */
+    int nc;
+
+    for (nc = 0; nc < 3; nc++) {
+        rgb_out[nc] = MAT_MAX((1.0 - beta) * rgb_inp[nc] + beta, 0.0);
+        rgb_out[nc] = cs_gamma_hlg(rgb_out[nc], EGD_NONLIN_2_LIN);
+    }
+    cs_hlg_ootf(rgb_out, rgb_out, luminance_limits[1], system_gamma);
+}
+
+/* HLG system gamma calculation */
+/* peak_luminance - Lw */
+MATFLOAT cs_hlg_system_gamma(MATFLOAT peak_luminance)
+{
+    MATFLOAT norm_peak = peak_luminance / (1000.0 / CS_MAX_LUMINANCE);
+    MATFLOAT system_gamma;
+
+    if ((peak_luminance < 400.0 / CS_MAX_LUMINANCE) || (peak_luminance > 2000.0 / CS_MAX_LUMINANCE))
+        system_gamma = 1.2 * mat_pow(1.111, mat_log2(norm_peak));
+    else
+        system_gamma = 1.2 + 0.42 * mat_log10(norm_peak);
+
+    return system_gamma;
+}
+
+#if 0
+/* PQ to HLG Transcode  */
+/* rgb_inp[] in a range [0,1] normalized to [0,10000]cd/m^2 in non-linear space */
+/* rgb_out[] in a range [0,1] normalized to [0,Lw]cd/m^2 in non-linear space */
+/* luminance_peak in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */
+/* gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */
+void cs_pq_to_hlg(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT gamma)
+{
+    MATFLOAT rgb_lin[3];
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        rgb_lin[nc] = cs_gamma_pq(rgb_inp[nc], EGD_NONLIN_2_LIN);    /* PQ to Linear [0,10000]->[0,10000] */
+
+    cs_hlg_ootf_inv(rgb_lin, rgb_lin, luminance_peak, gamma);    /* OOTF-1 - [0,10000]->[0,Lw] */
+    cs_hlg_oetf(rgb_lin, rgb_out, luminance_peak, gamma);    /* Linear to HLG - [0,Lw]->[0,Lw] */
+}
+
+/* HLG to PQ Transcode  */
+/* rgb_inp[] in a range [0,1] normalized to [0,Lw]cd/m^2 in non-linear space */
+/* rgb_out[] in a range [0,1] normalized to [0,10000]cd/m^2 in non-linear space */
+/* vec_luminace in a range [0,1] normalized to [0,10000]cd/m^2 in linear space - mastering Lb and Lw */
+/* gamma = 1.2 - for reference display (1000 cd/m^2) and reference ambient light (5 cd/m^2) */
+void cs_hlg_to_pq(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT vec_luminance[3], MATFLOAT gamma)
+{
+    MATFLOAT rgb_lin[3];
+    int nc;
+
+    cs_hlg_eotf(rgb_inp, rgb_lin, vec_luminance, gamma);    /* HLG to Linear - [0,Lw]->[0,Lw] */
+    cs_hlg_ootf(rgb_lin, rgb_lin, vec_luminance[1], gamma);    /* OOTF - [0,Lw]->[0,10000] */
+
+    for (nc = 0; nc < 3; nc++)
+        rgb_out[nc] = cs_gamma_pq(rgb_lin[nc], EGD_LIN_2_NONLIN);    /* Linear to PQ [0,10000]->[0,1000] */
+}
+
+/* BT.2390 display referred simplified */
+/* rgb_inp[] in a range [0,1] normalized to [0,100]cd/m^2 in non-linear space */
+/* rgb_out[] in a range [0,1] normalized to [0,1000]cd/m^2 in non-linear space */
+void cs_sdr_to_hlg(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT en_709_2020)
+{
+    MATFLOAT sdr_lb = 0.0;
+    MATFLOAT sdr_lw = 100.0 / 10000.0;
+    MATFLOAT sdr_gamma = 2.4;
+    MATFLOAT scale = 0.2546; /* 0.75HLG = 392cd/m^2 */
+    MATFLOAT hlg_lw = 1000.0 / 10000.0;
+    MATFLOAT hlg_amb = 5.0 / 10000.0;
+    MATFLOAT hlg_gamma = cs_gamma_adjust(1.2, hlg_lw, hlg_amb);
+    MATFLOAT gamma = 1.03;
+    MATFLOAT rgb_lin[3];
+    int nc;
+
+    for (nc = 0; nc < 3; nc++) {
+        rgb_lin[nc] = cs_gamma_1886(rgb_inp[nc], sdr_lb, sdr_lw, sdr_gamma); /* [0,10000]cd/m^2 */
+        rgb_lin[nc] = rgb_lin[nc] / sdr_lw; /* [0,sdr_lw]cd/m^2 */
+        rgb_lin[nc] = MAT_CLAMP(rgb_lin[nc], 0.0, 1.0);
+    }
+
+    if (en_709_2020) {
+        MATFLOAT rgb_tmp[3];
+
+        mat_copy(rgb_lin, rgb_tmp, 3);
+        mat_eval_3x3(cs_mat_709_2020, rgb_tmp, rgb_lin); /* [0,sdr_lw]cd/m^2 */
+    }
+
+    for (nc = 0; nc < 3; nc++) {
+        rgb_lin[nc] = rgb_lin[nc] * scale; /* scale to 392cd/m^2 [0,hlg_lw] */
+        rgb_lin[nc] = mat_pow(rgb_lin[nc], 1.0 / gamma); /* [0,hlg_lw] */
+    }
+
+    cs_hlg_oetf(rgb_lin, rgb_out, hlg_lw, hlg_gamma); /* Linear to HLG - [0,hlg_lw]cd/m^2->[0,hlg_lw]cd/m^2 */
+}
+#endif
diff --git a/src/amd/gmlib/gm/cs_funcs.h b/src/amd/gmlib/gm/cs_funcs.h
new file mode 100755
index 00000000000..2565087d806
--- /dev/null
+++ b/src/amd/gmlib/gm/cs_funcs.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : cs_funcs.h
+ * Purpose    : Color Space functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : September 20, 2023
+ * Version    : 1.4
+ *-------------------------------------------------------------------------
+ *
+ */
+
+#pragma once
+
+#include "mat_funcs.h"
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+#define CS_MAX_LUMINANCE 10000.0
+#define CS_SCALE_CCCS    125.0
+#define CS_CHAD_D65    0x01    /* apply chromatic adaptation */
+
+static MATFLOAT cs_mat_709_2020[3][3] = { /* BT.2087 */
+    {0.6274, 0.3293, 0.0433},
+    {0.0691, 0.9195, 0.0114},
+    {0.0164, 0.0880, 0.8956}
+};
+
+enum cs_white_point_type {
+    EWPT_NONE = 0,        /* NATIVE */
+    EWPT_A = 1,
+    EWPT_B = 2,
+    EWPT_C = 3,
+    EWPT_D50 = 4,
+    EWPT_D55 = 5,
+    EWPT_D65 = 6,        /* 709, sRRGB, ADOBE, APPLE */
+    EWPT_D75 = 7,
+    EWPT_9300 = 8,
+    EWPT_E = 9,
+    EWPT_F2 = 10,
+    EWPT_F7 = 11,
+    EWPT_F11 = 12,
+    EWPT_DCIP3 = 13,    /* DCI-P3 */
+    EWPT_11000 = 14,    /* 11000K */
+    EWPT_NUM = 15        /* CUSTOM */
+};
+
+enum cs_gamma_type {
+    EGT_LINEAR = 0,        /* LINEAR    */
+    EGT_709 = 1,        /* 709 (SD/HD)    */
+    EGT_ADOBE = 2,        /* ADOBE 1998    */
+    EGT_DCIP3 = 3,        /* DCI-P3    */
+    EGT_APPLE = 4,        /* APPLE    */
+    EGT_sRGB = 5,        /* sRGB        */
+    EGT_PQ = 6,        /* PQ        */
+    EGT_HLG = 7,        /* HLG        */
+    EGT_2_2 = 8,        /* 2.2        */
+    EGT_2_4 = 9,        /* 2.4        */
+    EGT_CUSTOM = 10        /* CUSTOM    */
+};
+
+enum cs_color_space_type {
+    ECST_709 = 0,        /* 709(HD),sRGB */
+    ECST_SMPTE = 1,        /* SMPTE RP125 (SD) */
+    ECST_ADOBE = 2,        /* ADOBE 1998    */
+    ECST_DCIP3 = 3,        /* DCI-P3    */
+    ECST_APPLE = 4,        /* APPLE    */
+    ECST_EBU = 5,        /* EBU 3213 (576i) */
+    ECST_NTSC = 6,        /* NTSC 1953    */
+    ECST_CIE = 7,        /* CIE        */
+    ECST_BT2020 = 8,    /* BT.2020    */
+    ECST_CUSTOM = 9        /* CUSTOM    */
+};
+
+enum cs_gamma_dir {
+    EGD_NONLIN_2_LIN = 0,
+    EGD_LIN_2_NONLIN = 1
+};
+
+struct s_cs_opts {
+    /* Color Space Type: [0,9]=0 : 0-709, 1-SMPTE, 2-ADOBE1998, 3-DCI-P3, 4-APPLE,
+        5-EBU3213, 6-NTSC, 7-CIE, 8-BT2020, 9-CUSTOM */
+    enum cs_color_space_type    color_space_type;
+    /* Gamma Type: [0,9]=1 : 0-LINEAR, 1-709, 2-ADOBE, 3-DCI-P3, 4-APPLE,
+        5-sRGB, 6-PQ, 7-HLG, 8-G2.2, 9-G2.4, 10-CUSTOM */
+    enum cs_gamma_type    gamma_type;
+    MATFLOAT    luminance_limits[2];    /* luminance min/max in a range [0.0,10000.0]= {0.0,400.0} */
+    MATFLOAT    pq_norm;    /* normalizatiion luminance for PQ: [0.0,10000.0] = 0.0 - no normalization */
+    unsigned int    mode;        /* mode: {0,1}=0 : Enable/disable Chromatic adaptation */
+    MATFLOAT    rgbw_xy[8];    /* Chromaticity: Red, Green, Blue, White in xy */
+    MATFLOAT    gamma_parm[4];    /* Gamma parameters: (0.0,?,?,?) - PQ, (0.5,?,?,?) - HLG  */
+};
+
+struct s_color_space {
+    /* input parameters */
+    /* cs_color_space_type: [0,9]=9 : 0-709, 1-SMPTE, 2-ADOBE1998, 3-DCI-P3, 4-APPLE,
+        5-EBU3213, 6-NTSC, 7-CIE, 8-BT2020, 9-CUSTOM */
+    enum cs_color_space_type    color_space_type;
+    /* cs_gamma_type: [0,9]=9 : 0-LINEAR, 1-709, 2-ADOBE, 3-DCI-P3, 4-APPLE,
+        5-sRGB, 6-PQ, 7-HLG, 8-Gamma2.2, 9-CUSTOM */
+    enum cs_gamma_type        gamma_type;
+    /* luminances min/max/range normilized to 10000.0 in a range [0.0,1.0]=0.0,1.0,1.0 */
+    MATFLOAT    luminance_limits[3];
+    MATFLOAT    pq_norm;    /* normalizatiion luminance for PQ: [0.0,10000.0] = 0.0 - no normalization */
+    unsigned int    mode;            /* mode: {0,1}=0 : CS_CHAD_D65 - Enable Chromatic Adaptation */
+    /* custom or initialized parameters based on input parameters */
+    MATFLOAT    rgbw_xy[8];        /* Red, Green, Blue, White in xy */
+    MATFLOAT    gamma_parm[4];        /* Gamma parameters: 0.0,?,?,? - PQ, 0.5,?,?,? - HLG */
+    /* calculated variables */
+    MATFLOAT    luma_limits[3];        /* Min/max/range luma (PQ) normilized to 10000 : [0.0,1.0]=0,1,1 */
+    MATFLOAT    mat_rgb2xyz[3][3];    /* RGB to XYZ matrix */
+    MATFLOAT    mat_xyz2rgb[3][3];    /* XYZ to RGB matrix */
+    MATFLOAT    mat_rgb2lms[3][3];    /* RGB to LMS matrix */
+    MATFLOAT    mat_lms2rgb[3][3];    /* LMS to RGB matrix */
+    MATFLOAT    mat_lms2itp[3][3];    /* LMS to ITP matrix */
+    MATFLOAT    mat_itp2lms[3][3];    /* ITP to LMS matrix */
+    MATFLOAT    mat_chad[3][3];        /* Chromatic Adaptation matrix */
+    MATFLOAT    white_xyz[3];        /* White in XYZ */
+    int        cct;            /* Correlated Color Temperature */
+    MATFLOAT    hlg_system_gamma;    /* HLG OOTF system gamma for */
+    MATFLOAT    hlg_beta;        /* user black level lift */
+};
+
+/* get internal constants */
+const MATFLOAT *cs_get_gamma(enum cs_gamma_type gamma_type);
+const MATFLOAT *cs_get_color_space(enum cs_color_space_type color_space_type);
+const MATFLOAT *cs_get_white_point(enum cs_white_point_type white_point_type);
+
+/* initilize color space functions */
+void cs_set_opts_def(struct s_cs_opts *ptr_cs_opts);
+void cs_init(struct s_cs_opts *ptr_cs_opts, struct s_color_space *ptr_color_space);
+void cs_init_private(struct s_color_space *ptr_color_space);
+void cs_copy(struct s_color_space *ptr_color_space_src, struct s_color_space *ptr_color_space_dst);
+void cs_luminance_to_luma_limits(MATFLOAT luminance_limits[2], MATFLOAT luma_limits[3]);
+
+/* color formats conversion functions */
+void cs_xyy_to_xyz(MATFLOAT xyy_inp[3], MATFLOAT xyz_out[3]);
+void cs_xyz_to_xyy(MATFLOAT xyz_inp[3], MATFLOAT xyy_out[3]);
+
+void cs_xyzc_to_xyz(MATFLOAT xyz_inp[3], MATFLOAT xyz_out[3]);
+void cs_xyz_to_xyzc(MATFLOAT xyz_inp[3], MATFLOAT xyz_out[3]);
+
+void cs_rgb_to_itp(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT itp_out[3]);
+void cs_itp_to_rgb(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT rgb_out[3]);
+
+void cs_ich_to_itp(MATFLOAT ich_inp[3], MATFLOAT itp_out[3]);
+void cs_itp_to_ich(MATFLOAT itp_inp[3], MATFLOAT ich_out[3]);
+
+void cs_rgb_to_yuv(MATFLOAT rgb_inp[3], MATFLOAT yuv_out[3]);
+void cs_yuv_to_rgb(MATFLOAT yuv_inp[3], MATFLOAT rgb_out[3]);
+
+MATFLOAT cs_nlin_to_lin(struct s_color_space *ptr_color_space, MATFLOAT val_inp);
+void cs_nlin_to_lin_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]);
+
+MATFLOAT cs_lin_to_nlin(struct s_color_space *ptr_color_space, MATFLOAT val_inp);
+void cs_lin_to_nlin_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]);
+
+/* internal matrixes genereation functions */
+int cs_genmat_rgb_to_xyz(MATFLOAT rgbw_xy[8], MATFLOAT mat_rgb2xyz[3][3]);
+int cs_genmat_xyz_to_rgb(MATFLOAT rgbw_xy[8], MATFLOAT mat_xyz2rgb[3][3]);
+int cs_genmat_rgb_to_rgb(MATFLOAT rgbw_xy_src[8], MATFLOAT rgbw_xy_dst[8], MATFLOAT mat_rgb2rgb[3][3], int en_chad);
+int cs_genmat_chad(MATFLOAT white_xy_src[2], MATFLOAT white_xy_dst[2], MATFLOAT mat_chad[3][3]);
+
+/* gamma curves generation functions */
+MATFLOAT cs_gamma(MATFLOAT val, MATFLOAT gamma_parm[4], enum cs_gamma_dir gamma_dir);
+MATFLOAT cs_gamma_pq(MATFLOAT val, enum cs_gamma_dir gamma_dir);
+MATFLOAT cs_gamma_1886(MATFLOAT val, MATFLOAT lb, MATFLOAT lw, MATFLOAT gamma);
+
+void cs_pq_ootf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]);
+
+void cs_sdr_to_pq(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT en_709_2020);
+
+void cs_gamma_rgb(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT gamma_parm[4], enum cs_gamma_dir gamma_dir);
+
+/* signal clipping functions */
+int cs_min_rgb(MATFLOAT rgb[3], MATFLOAT val_min);
+int cs_max_rgb(MATFLOAT rgb[3], MATFLOAT val_max);
+
+/* signal validation functions */
+int cs_is_valid_itp(struct s_color_space *ptr_color_space, MATFLOAT itp[3]);
+int cs_is_valid_ic(struct s_color_space *ptr_color_space, MATFLOAT pnt_ic[2], MATFLOAT hue_sin_cos[2]);
+int cs_is_valid_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max);
+int cs_clip_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max);
+void cs_clamp_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_max);
+
+/* signal normalization functions */
+void cs_norm_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_rng);
+void cs_denorm_rgb(MATFLOAT rgb[3], MATFLOAT val_min, MATFLOAT val_rng);
+
+/* signal format conversion functions */
+void cs_int2flt_rgb(int rgb_inp[3], MATFLOAT rgb_out[3], int val_max);
+void cs_flt2int_rgb(MATFLOAT rgb_inp[3], int rgb_out[3], int val_max);
+void cs_short2flt_rgb(unsigned short rgb_inp[3], MATFLOAT rgb_out[3], int val_max);
+void cs_flt2short_rgb(MATFLOAT rgb_inp[3], unsigned short rgb_out[3], int val_max);
+
+void cs_genprim_itp(struct s_color_space *ptr_color_space,
+    int num_prim, MATFLOAT *ptr_prim_rgb, MATFLOAT *ptr_prim_ich);
+
+/* gamma curve handling functions */
+MATFLOAT cs_soft_clip(MATFLOAT val, MATFLOAT limits_src[3], MATFLOAT limits_dst[3]);
+MATFLOAT cs_gamma_to_gamma(MATFLOAT val, enum cs_gamma_type gamma_type_src, enum cs_gamma_type gamma_type_dst,
+    MATFLOAT luminance_limits_dst[3], MATFLOAT luma_limits_src[3], MATFLOAT luma_limits_dst[3],
+    MATFLOAT(*func_pq_to_pq)(MATFLOAT), int en_norm, int en_soft_clip);
+
+/* CCT handling functions */
+#define CS_CCT_MIN 1000
+#define CS_CCT_MAX 20000
+#define CS_CCT_INC 100
+#define CS_CCT_SIZE ((CS_CCT_MAX - CS_CCT_MIN) / CS_CCT_INC + 1)
+
+int cs_xy_to_cct(MATFLOAT white_xy[2]);
+void cs_cct_to_xy(int cct, MATFLOAT xy[2]);
+void cs_csc(struct s_color_space *ptr_cs_src, struct s_color_space *ptr_cs_dst,
+    MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], int en_chad);
+int cs_is_space(struct s_color_space *ptr_color_space,
+    enum cs_color_space_type color_space_type, enum cs_gamma_type gamma_type);
+
+void cs_init_type(MATFLOAT luminance_limits[2],
+    enum cs_color_space_type color_space_type, enum cs_gamma_type gamma_type,
+    struct s_color_space *ptr_color_space);
+void cs_init_BT709(MATFLOAT luminance_limits[2], struct s_color_space *ptr_color_space);
+void cs_init_BT2100(MATFLOAT luminance_limits[2], struct s_color_space *ptr_color_space);
+void cs_rgb_to_ycbcr2020(MATFLOAT rgb_inp[3], MATFLOAT ycbcr_out[3]);
+
+MATFLOAT cs_ootf_gamma_peak(MATFLOAT gamma, MATFLOAT luminance_peak);
+MATFLOAT cs_ootf_gamma_amb(MATFLOAT gamma, MATFLOAT luminance_ambient);
+MATFLOAT cs_gamma_adjust_sdr(MATFLOAT gamma, MATFLOAT luminance_peak);
+MATFLOAT cs_gamma_adjust(MATFLOAT gamma, MATFLOAT luminance_peak, MATFLOAT luminance_amb);
+
+void cs_chad_gains(MATFLOAT rgbw_xy[8], MATFLOAT w_xy[2], MATFLOAT rgb_gain[3]);
+void cs_genmat_cct(struct s_color_space *ptr_cs, int cct_shift, int norm, MATFLOAT mat_cct[3][3]);
+
+/* HSV functions */
+int cs_rgb_to_vsh(MATFLOAT rgb[3], MATFLOAT vsh[3]);
+void cs_vsh_to_rgb(MATFLOAT vsh[3], MATFLOAT rgb[3]);
+
+/* YUV functions */
+void cs_yuv_to_ysh(MATFLOAT yuv_inp[3], MATFLOAT ysh_out[3]);
+void cs_ysh_to_yuv(MATFLOAT ysh_inp[3], MATFLOAT yuv_out[3]);
+
+/* CIELAB functions */
+#define CS_LAB_E 0.008856
+#define CS_LAB_K 903.3
+
+void cs_rgb_to_lab(MATFLOAT rgb[3], MATFLOAT lab[3], struct s_color_space *ptr_color_space);
+void cs_lab_to_rgb(MATFLOAT lab[3], MATFLOAT rgb[3], struct s_color_space *ptr_color_space);
+void cs_xyz_to_lab(MATFLOAT xyz[3], MATFLOAT lab[3], MATFLOAT white_xyz[3]);
+void cs_lab_to_xyz(MATFLOAT lab[3], MATFLOAT xyz[3], MATFLOAT white_xyz[3]);
+MATFLOAT cs_de94(MATFLOAT lab0[3], MATFLOAT lab1[3]);
+
+/* HLG functions */
+MATFLOAT cs_gamma_hlg(MATFLOAT val, enum cs_gamma_dir gamma_dir);
+void cs_hlg_ootf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma);
+void cs_hlg_ootf_inv(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT gamma);
+void cs_hlg_oetf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma);
+void cs_hlg_eotf(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_limits[3],
+    MATFLOAT system_gamma, MATFLOAT beta);
+MATFLOAT cs_hlg_system_gamma(MATFLOAT peak_luminance);
+
+#if 0
+void cs_pq_to_hlg(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT luminance_peak, MATFLOAT system_gamma);
+void cs_hlg_to_pq(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT vec_luminance[3],
+    MATFLOAT system_gamma, MATFLOAT beta);
+void cs_sdr_to_hlg(MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3], MATFLOAT en_709_2020);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/amd/gmlib/gm/csc_api_funcs.c b/src/amd/gmlib/gm/csc_api_funcs.c
new file mode 100755
index 00000000000..e76a6574620
--- /dev/null
+++ b/src/amd/gmlib/gm/csc_api_funcs.c
@@ -0,0 +1,75 @@
+﻿/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : csc_api_funcs.c
+ * Purpose    : Color Space Conversion 3DLUT functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : June 09, 2023
+ * Version    : 1.2
+ *----------------------------------------------------------------------
+ *
+ */
+
+#ifndef GM_SIM
+#pragma code_seg("PAGED3PC")
+#pragma data_seg("PAGED3PD")
+#pragma const_seg("PAGED3PR")
+#endif
+
+#include "csc_api_funcs.h"
+
+void csc_api_set_def(struct s_csc_api_opts *ptr_csc_api_opts)
+{
+    cs_set_opts_def(&ptr_csc_api_opts->cs_opts_src);
+    cs_set_opts_def(&ptr_csc_api_opts->cs_opts_dst);
+    ptr_csc_api_opts->en_chad = 0;
+
+    /* 3DLUT */
+    ptr_csc_api_opts->en_merge_3dlut = 0;
+    ptr_csc_api_opts->num_pnts_3dlut = 17;
+    ptr_csc_api_opts->bitwidth_3dlut = 12;
+    ptr_csc_api_opts->ptr_3dlut_rgb = 0;
+}
+
+int csc_api_gen_map(struct s_csc_api_opts *ptr_csc_api_opts, struct s_csc_map *ptr_csc_map)
+{
+    cs_init(&ptr_csc_api_opts->cs_opts_src, &ptr_csc_map->color_space_src);
+    cs_init(&ptr_csc_api_opts->cs_opts_dst, &ptr_csc_map->color_space_dst);
+
+    ptr_csc_map->en_chad = ptr_csc_api_opts->en_chad;
+
+    return csc_init_map(ptr_csc_map);
+}
+
+int csc_api_gen_3dlut(struct s_csc_api_opts *ptr_csc_api_opts, struct s_csc_map *ptr_csc_map)
+{
+    int index = 0;
+    int value_max = (1 << ptr_csc_api_opts->bitwidth_3dlut) - 1;
+    int nir, nig, nib;
+
+    if (ptr_csc_api_opts->ptr_3dlut_rgb == 0)
+        return -1;    /* something wrong */
+
+    for (nir = 0; nir < ptr_csc_api_opts->num_pnts_3dlut; nir++)
+        for (nig = 0; nig < ptr_csc_api_opts->num_pnts_3dlut; nig++)
+            for (nib = 0; nib < ptr_csc_api_opts->num_pnts_3dlut; nib++) {
+                unsigned short rgb[3];
+                MATFLOAT rgb_inp[3], rgb_out[3];
+
+                rgb[0] = ptr_csc_api_opts->en_merge_3dlut ? ptr_csc_api_opts->ptr_3dlut_rgb[index + 0] :
+                    (nir * value_max) / (ptr_csc_api_opts->num_pnts_3dlut - 1);
+                rgb[1] = ptr_csc_api_opts->en_merge_3dlut ? ptr_csc_api_opts->ptr_3dlut_rgb[index + 1] :
+                    (nig * value_max) / (ptr_csc_api_opts->num_pnts_3dlut - 1);
+                rgb[2] = ptr_csc_api_opts->en_merge_3dlut ? ptr_csc_api_opts->ptr_3dlut_rgb[index + 2] :
+                    (nib * value_max) / (ptr_csc_api_opts->num_pnts_3dlut - 1);
+
+                cs_short2flt_rgb(rgb, rgb_inp, value_max);
+                csc_rgb_to_rgb(ptr_csc_map, rgb_inp, rgb_out);
+                cs_flt2short_rgb(rgb_out, &ptr_csc_api_opts->ptr_3dlut_rgb[index], value_max);
+                index += 3;
+            }
+
+    return 0;
+}
diff --git a/src/amd/gmlib/gm/csc_api_funcs.h b/src/amd/gmlib/gm/csc_api_funcs.h
new file mode 100755
index 00000000000..522b16b0477
--- /dev/null
+++ b/src/amd/gmlib/gm/csc_api_funcs.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : csc_api_funcs.h
+ * Purpose    : Color Space Conversion 3DLUT functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : June 09, 2023
+ * Version    : 1.2
+ *----------------------------------------------------------------------
+ *
+ */
+
+#pragma once
+
+#include "csc_funcs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct s_csc_api_opts { /* csc parameters */
+    int    en_chad;    /* enable/disable chromatic adaptation: {0,1}=0 */
+    struct s_cs_opts    cs_opts_src;    /* Source color space */
+    struct s_cs_opts    cs_opts_dst;    /* Destination color space */
+    /* 3DLUT parameters */
+    int        en_merge_3dlut;
+    int        num_pnts_3dlut;
+    int        bitwidth_3dlut;
+    unsigned short    *ptr_3dlut_rgb;
+};
+
+void csc_api_set_def(struct s_csc_api_opts *ptr_csc_api_opts);
+
+int csc_api_gen_map(struct s_csc_api_opts *ptr_csc_api_opts, struct s_csc_map *ptr_csc_map);
+int csc_api_gen_3dlut(struct s_csc_api_opts *ptr_csc_api_opts, struct s_csc_map *ptr_csc_map);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/amd/gmlib/gm/csc_funcs.c b/src/amd/gmlib/gm/csc_funcs.c
new file mode 100755
index 00000000000..82e3310ba6f
--- /dev/null
+++ b/src/amd/gmlib/gm/csc_funcs.c
@@ -0,0 +1,56 @@
+﻿/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : csc_funcs.c
+ * Purpose    : Color Space Conversion 3DLUT functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : June 09, 2023
+ * Version    : 1.2
+ *----------------------------------------------------------------------
+ *
+ */
+
+#ifndef GM_SIM
+#pragma code_seg("PAGED3PC")
+#pragma data_seg("PAGED3PD")
+#pragma const_seg("PAGED3PR")
+#endif
+
+#include "csc_funcs.h"
+
+void csc_ctor(struct s_csc_map *ptr_csc_map)
+{
+    csc_set_def(ptr_csc_map);
+}
+
+void csc_dtor(struct s_csc_map *ptr_csc_map)
+{
+}
+
+void csc_set_def(struct s_csc_map *ptr_csc_map)
+{
+    ptr_csc_map->en_chad = 0;
+    mat_3x3_unity(ptr_csc_map->mat_csc);
+}
+
+int csc_init_map(struct s_csc_map *ptr_csc_map)
+{
+    cs_genmat_rgb_to_rgb(ptr_csc_map->color_space_src.rgbw_xy, ptr_csc_map->color_space_dst.rgbw_xy,
+        ptr_csc_map->mat_csc, ptr_csc_map->en_chad);
+
+    return 0;
+}
+
+int csc_rgb_to_rgb(struct s_csc_map *ptr_csc_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3])
+{
+    MATFLOAT rgb_tmp[3];
+
+    cs_nlin_to_lin_rgb(&ptr_csc_map->color_space_src, rgb_inp, rgb_tmp);
+    mat_eval_3x3(ptr_csc_map->mat_csc, rgb_tmp, rgb_out);
+    cs_clamp_rgb(rgb_out, 0.0, 1.0);
+    cs_lin_to_nlin_rgb(&ptr_csc_map->color_space_dst, rgb_out, rgb_out);
+
+    return 0;
+}
diff --git a/src/amd/gmlib/gm/csc_funcs.h b/src/amd/gmlib/gm/csc_funcs.h
new file mode 100755
index 00000000000..0ea4e4b2a65
--- /dev/null
+++ b/src/amd/gmlib/gm/csc_funcs.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : csc_funcs.h
+ * Purpose    : Color Space Conversion 3DLUT functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : June 09, 2023
+ * Version    : 1.2
+ *----------------------------------------------------------------------
+ *
+ */
+
+#pragma once
+
+#include "cs_funcs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct s_csc_map {
+    int    en_chad;    /* enable/disable chromatic adaptation: {0,1}=0 */
+    struct s_color_space    color_space_src;    /* Source color space */
+    struct s_color_space    color_space_dst;    /* Destination color space */
+    MATFLOAT mat_csc[3][3];    /* color space conversion matrix */
+};
+
+/* constructor and destructor */
+void csc_ctor(struct s_csc_map *ptr_csc_map);
+void csc_dtor(struct s_csc_map *ptr_csc_map);
+
+void csc_set_def(struct s_csc_map *ptr_csc_map);
+int csc_init_map(struct s_csc_map *ptr_csc_map);
+
+int csc_rgb_to_rgb(struct s_csc_map *ptr_csc_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/amd/gmlib/gm/cvd_api_funcs.c b/src/amd/gmlib/gm/cvd_api_funcs.c
new file mode 100755
index 00000000000..01de0600135
--- /dev/null
+++ b/src/amd/gmlib/gm/cvd_api_funcs.c
@@ -0,0 +1,85 @@
+﻿/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : cvd_api_funcs.c
+ * Purpose    : Color Vision Deficiency functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : January 21, 2020
+ * Version    : 1.0
+ *----------------------------------------------------------------------
+ *
+ */
+
+#ifndef GM_SIM
+#pragma code_seg("PAGED3PC")
+#pragma data_seg("PAGED3PD")
+#pragma const_seg("PAGED3PR")
+#endif
+
+#include "cvd_api_funcs.h"
+
+void cvd_api_set_def(struct s_cvd_api_opts *ptr_api_cvd_opts)
+{
+    int nk;
+
+    ptr_api_cvd_opts->mode = ECM_NONE;
+    for (nk = 0; nk < 3; nk++)
+        ptr_api_cvd_opts->gain[nk] = 0.0;
+
+    cs_set_opts_def(&ptr_api_cvd_opts->cs_opts);
+
+    ptr_api_cvd_opts->en_merge_3dlut = 0;
+    ptr_api_cvd_opts->num_pnts_3dlut = 17;
+    ptr_api_cvd_opts->bitwidth_3dlut = 12;
+    ptr_api_cvd_opts->ptr_3dlut_rgb = 0;
+}
+
+int cvd_api_gen_map(struct s_cvd_api_opts *ptr_api_cvd_opts, struct s_cvd_map *ptr_cvd_map)
+{
+    int nk;
+
+    cvd_set_def(ptr_cvd_map);
+
+    ptr_cvd_map->mode = ptr_api_cvd_opts->mode;
+    for (nk = 0; nk < 3; nk++)
+        ptr_cvd_map->gain[nk] = ptr_api_cvd_opts->gain[nk];
+
+    cs_init(&ptr_api_cvd_opts->cs_opts, &ptr_cvd_map->color_space);
+
+    return 0;
+}
+
+int cvd_api_gen_3dlut(struct s_cvd_api_opts *ptr_api_cvd_opts, struct s_cvd_map *ptr_cvd_map)
+{
+    int index = 0;
+    int nir, nig, nib;
+    int value_max;
+
+    if (ptr_api_cvd_opts->ptr_3dlut_rgb == 0)
+        return -1;    /* something wrong */
+
+    value_max = (1 << ptr_api_cvd_opts->bitwidth_3dlut) - 1;
+    for (nir = 0; nir < ptr_api_cvd_opts->num_pnts_3dlut; nir++)
+        for (nig = 0; nig < ptr_api_cvd_opts->num_pnts_3dlut; nig++)
+            for (nib = 0; nib < ptr_api_cvd_opts->num_pnts_3dlut; nib++) {
+                unsigned short rgb[3];
+                MATFLOAT rgb_inp[3], rgb_out[3];
+
+                rgb[0] = ptr_api_cvd_opts->en_merge_3dlut ? ptr_api_cvd_opts->ptr_3dlut_rgb[index + 0] :
+                    (nir * value_max) / (ptr_api_cvd_opts->num_pnts_3dlut - 1);
+                rgb[1] = ptr_api_cvd_opts->en_merge_3dlut ? ptr_api_cvd_opts->ptr_3dlut_rgb[index + 1] :
+                    (nig * value_max) / (ptr_api_cvd_opts->num_pnts_3dlut - 1);
+                rgb[2] = ptr_api_cvd_opts->en_merge_3dlut ? ptr_api_cvd_opts->ptr_3dlut_rgb[index + 2] :
+                    (nib * value_max) / (ptr_api_cvd_opts->num_pnts_3dlut - 1);
+
+                cs_short2flt_rgb(rgb, rgb_inp, value_max);
+                cvd_rgb_to_rgb(ptr_cvd_map, rgb_inp, rgb_out);
+                cs_flt2short_rgb(rgb_out, &ptr_api_cvd_opts->ptr_3dlut_rgb[index], value_max);
+
+                index += 3;
+            }
+
+    return 0;
+}
diff --git a/src/amd/gmlib/gm/cvd_api_funcs.h b/src/amd/gmlib/gm/cvd_api_funcs.h
new file mode 100755
index 00000000000..16692a470c4
--- /dev/null
+++ b/src/amd/gmlib/gm/cvd_api_funcs.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : cvd_api_funcs.h
+ * Purpose    : Color Vision Deficiency functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : January 21, 2020
+ * Version    : 1.0
+ *----------------------------------------------------------------------
+ *
+ */
+
+#pragma once
+
+#include "cvd_funcs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct s_cvd_api_opts {
+    /* cvd parameters */
+    enum cvd_mode        mode;        /* CVD mode: 0 - NONE, 1 - 3 sliders, 2 - 1 slider*/
+    MATFLOAT            gain[3];    /* Compensation Gain: ([0] - Protanopia, [1] - Deuteranopia, [2] - Tritanopia: [0.0,2.0]=0.0 */
+    struct s_cs_opts    cs_opts;    /* Color Space parameters */
+    /* 3DLUT parameters */
+    int                en_merge_3dlut;
+    int                num_pnts_3dlut;
+    int                bitwidth_3dlut;
+    unsigned short    *ptr_3dlut_rgb;
+};
+
+void cvd_api_set_def(struct s_cvd_api_opts *ptr_api_cvd_opts);
+
+int cvd_api_gen_map(struct s_cvd_api_opts *ptr_api_cvd_opts, struct s_cvd_map *ptr_cvd_map);
+int cvd_api_gen_3dlut(struct s_cvd_api_opts *ptr_api_cvd_opts, struct s_cvd_map *ptr_cvd_map);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/amd/gmlib/gm/cvd_funcs.c b/src/amd/gmlib/gm/cvd_funcs.c
new file mode 100755
index 00000000000..a3878a466de
--- /dev/null
+++ b/src/amd/gmlib/gm/cvd_funcs.c
@@ -0,0 +1,132 @@
+﻿/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : cvd_funcs.c
+ * Purpose    : Color Vision Deficiency functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : January 21, 2020
+ * Version    : 1.0
+ *----------------------------------------------------------------------
+ *
+ */
+
+#ifndef GM_SIM
+#pragma code_seg("PAGED3PC")
+#pragma data_seg("PAGED3PD")
+#pragma const_seg("PAGED3PR")
+#endif
+
+#include "cvd_funcs.h"
+
+void cvd_ctor(struct s_cvd_map *ptr_cvd_map)
+{
+    cvd_set_def(ptr_cvd_map);
+}
+
+void cvd_dtor(struct s_cvd_map *ptr_cvd_map)
+{
+    cvd_set_def(ptr_cvd_map);
+}
+
+void cvd_set_def(struct s_cvd_map *ptr_cvd_map)
+{
+    int nk;
+
+    ptr_cvd_map->mode = ECM_NONE;
+
+    for (nk = 0; nk < 3; nk++)
+        ptr_cvd_map->gain[nk] = 0.0;
+
+}
+
+int cvd_rgb_to_rgb(struct s_cvd_map *ptr_cvd_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3])
+{
+    int rc = 0;
+
+    if (ptr_cvd_map->mode != ECM_NONE)
+        rc = cvd_rgb_to_rgb_dalton(ptr_cvd_map, rgb_inp, rgb_out);
+    else
+        mat_copy(rgb_inp, rgb_out, 3);
+
+    return rc;
+}
+
+void cvd_model_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3],
+    enum cvd_type type)
+{
+    static MATFLOAT cvd_mat_rgb2lms[3][3] = {
+        {17.8824, 43.5161, 4.11935},
+        {3.45565, 27.1554, 3.86714},
+        {0.0299566, 0.184309, 1.46709}
+    };
+    static MATFLOAT cvd_mat_lms2rgb[3][3] = {
+        { 0.080944, -0.130504, 0.116721},
+        {-0.0102485, 0.0540194, -0.113615},
+        {-0.000365294, -0.00412163, 0.693513}
+    };
+    static MATFLOAT cvd_mat_model[ECVDT_NUM][3][3] = {
+        {/* protanopia */     {0.0, 2.02324, -2.52581}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}},
+        {/* deuteranopia */   {1.0, 0.0, 0.0}, {0.494207, 0.0, 1.24827}, {0.0, 0.0, 1.0}},
+//      {/* tritanopia */     {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {-0.395913, 0.801109, 0.0}}
+        {/* tritanopia */     {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {-0.012245, 0.0720345, 0.0}}
+    };
+
+    MATFLOAT lms_inp[3], lms_out[3];
+
+    mat_eval_3x3(cvd_mat_rgb2lms, rgb_inp, lms_inp);
+    mat_eval_3x3(cvd_mat_model[type], lms_inp, lms_out);
+    mat_eval_3x3(cvd_mat_lms2rgb, lms_out, rgb_out);
+    cs_clamp_rgb(rgb_out, 0.0, 1.0);
+}
+
+int cvd_rgb_to_rgb_dalton(struct s_cvd_map *ptr_cvd_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3])
+{
+    static MATFLOAT cvd_mat_err[ECVDT_NUM][3][3] = {
+        {/* protanopia */      {-0.5, 0.0, 0.0}, {1.0,  1.0, 0.0}, {1.0, 0.0,  1.0}},
+        {/* deuteranopia */    { 1.0, 1.0, 0.0}, {0.0, -0.5, 0.0}, {0.0, 1.0,  1.0}},
+        {/* tritanopia */      { 1.0, 0.0, 1.0}, {0.0,  1.0, 1.0}, {0.0, 0.0, -0.5}}
+    };
+
+    MATFLOAT rgb_inp_lin[3], rgb_out_lin[3];
+    MATFLOAT rgb_err_map[ECVDT_NUM][3];
+    MATFLOAT err_map;
+    MATFLOAT gain;
+    int nc, nk;
+
+    cs_gamma_rgb(rgb_inp, rgb_inp_lin, ptr_cvd_map->color_space.gamma_parm, EGD_NONLIN_2_LIN);
+    mat_copy(rgb_inp_lin, rgb_out_lin, 3);
+
+    for (nk = 0; nk < 3; nk++) {
+        MATFLOAT rgb_cvd[3], rgb_err[3];
+
+        cvd_model_rgb(&ptr_cvd_map->color_space, rgb_inp_lin, rgb_cvd, nk);
+        for (nc = 0; nc < 3; nc++)
+            rgb_err[nc] = rgb_inp_lin[nc] - rgb_cvd[nc];
+        mat_eval_3x3(cvd_mat_err[nk], rgb_err, rgb_err_map[nk]);
+    }
+
+    if (ptr_cvd_map->mode == ECM_DALTON_SLD3) {    /* ECM_DALTON_SLD3 */
+        for (nk = 0; nk < 3; nk++) {
+            gain = ptr_cvd_map->gain[nk] * 0.5;
+            for (nc = 0; nc < 3; nc++)
+                rgb_out_lin[nc] += rgb_err_map[nk][nc] * gain;
+        }
+    } else {    /* ECM_DALTON_SLD1 */
+        for (nc = 0; nc < 3; nc++) {
+            if (ptr_cvd_map->gain[0] <= 1.0)
+                err_map = ptr_cvd_map->gain[0] * rgb_err_map[0][nc];
+            else if (ptr_cvd_map->gain[0] <= 2.0)
+                err_map = rgb_err_map[0][nc] + (ptr_cvd_map->gain[0] - 1.0) * (rgb_err_map[1][nc] - rgb_err_map[0][nc]);
+            else
+                err_map = rgb_err_map[1][nc] + (ptr_cvd_map->gain[0] - 2.0) * (rgb_err_map[2][nc] - rgb_err_map[1][nc]);
+            rgb_out_lin[nc] += err_map;
+        }
+    }
+
+    cs_clamp_rgb(rgb_out_lin, 0.0, 1.0);
+    cs_gamma_rgb(rgb_out_lin, rgb_out, ptr_cvd_map->color_space.gamma_parm, EGD_LIN_2_NONLIN);
+
+    return 0;
+}
diff --git a/src/amd/gmlib/gm/cvd_funcs.h b/src/amd/gmlib/gm/cvd_funcs.h
new file mode 100755
index 00000000000..ea263746de9
--- /dev/null
+++ b/src/amd/gmlib/gm/cvd_funcs.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : cvd_funcs.h
+ * Purpose    : Color Vision Deficiency functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : January 21, 2020
+ * Version    : 1.0
+ *----------------------------------------------------------------------
+ *
+ */
+
+#pragma once
+
+#include "cs_funcs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum cvd_mode {
+    ECM_NONE = 0,    /* NONE */
+    ECM_DALTON_SLD3 = 1,    /* DALTONIZATION 3 control sliders */
+    ECM_DALTON_SLD1 = 2,    /* DALTONIZATION 1 control slider */
+    ECM_NUM = 3
+};
+
+enum cvd_type {
+    ECVDT_PROTANOPIA = 0,    /* protanopia */
+    ECVDT_DEUTERANOPIA = 1,    /* deuteranopia */
+    ECVDT_TRITANOPIA = 2,    /* tritanopia */
+    ECVDT_NUM = 3
+};
+
+struct s_cvd_map {
+    /* input parameters */
+    enum cvd_mode            mode;            /* Enable/disable CVD: {0,1,2}=0 */
+    MATFLOAT                gain[3];        /* Compensation Gain: ([0] - Protanopia, [1] - Deuteranopia, [2] - Tritanopia: [0.0,2.0]=0.0 */
+    struct s_color_space    color_space;    /* Color Space (primary RGBW chromaticity, gamma, and Luminance min/max) */
+};
+
+/* constructor and destructor */
+void cvd_ctor(struct s_cvd_map *ptr_cvd_map);
+void cvd_dtor(struct s_cvd_map *ptr_cvd_map);
+
+void cvd_set_def(struct s_cvd_map *ptr_cvd_map);
+
+int cvd_rgb_to_rgb(struct s_cvd_map *ptr_cvd_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]);
+void cvd_model_rgb(struct s_color_space *ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3],
+    enum cvd_type type);
+int cvd_rgb_to_rgb_dalton(struct s_cvd_map *ptr_cvd_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/amd/gmlib/gm/gm_api_funcs.c b/src/amd/gmlib/gm/gm_api_funcs.c
new file mode 100755
index 00000000000..a16e4dc45b9
--- /dev/null
+++ b/src/amd/gmlib/gm/gm_api_funcs.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : gm_api_funcs.c
+ * Purpose    : Gamut Mapping API functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : November 12, 2024
+ * Version    : 3.1
+ *----------------------------------------------------------------------
+ *
+ */
+
+#ifndef GM_SIM
+#pragma code_seg("PAGED3PC")
+#pragma data_seg("PAGED3PD")
+#pragma const_seg("PAGED3PR")
+#endif
+
+#include "gm_api_funcs.h"
+
+/* non library helper functions */
+/*
+    // SESSION START
+    struct s_gamut_map gamut_map;
+    gm_ctor(&gamut_map, gm_api_alloc, gm_api_free);    // constructor - once per session
+
+    struct s_gm_opts gm_opts;
+    gm_api_set_def(&gm_opts);                // set default mapping
+    gm_api_gen_map(&gm_opts, &gamut_map);    // generate default mapping
+
+    gm_opts.ptr_3dlut_rgb = (unsigned short *)gamut_map.ptr_func_alloc(
+    3 * sizeof(unsigned short) * gm_opts.num_pnts_3dlut * gm_opts.num_pnts_3dlut * gm_opts.num_pnts_3dlut);    // allocate 3DLUT memory
+
+    SOURCE OR TARGET GAMUT IS CHANGED EVENT
+    {
+        // ...................
+        // set parameters of src gamut, dst gamut and gamut mapping
+        // ...................
+        gm_opts.update_msk = GM_UPDATE_SRC;    // GM_UPDATE_SRC -
+        update source gamut, GM_UPDATE_DST - update destination gamut or mapping parameters has been changed
+        // or
+        gm_opts.update_msk = GM_UPDATE_DST;    // GM_UPDATE_SRC - u
+        pdate source gamut, GM_UPDATE_DST - update destination gamut or mapping parameters has been changed
+
+        int rc = gm_api_gen_map(&gm_opts, &gamut_map);
+        if (rc == 0) {
+            rc = gm_api_gen_3dlut(&gm_opts, &gamut_map);        // generate 3DLUT
+//            .................
+//            load 3DLUT to HW registers
+//            .................
+        }
+    }
+
+    // SESSION END
+    gamut_map.ptr_func_free(gm_opts.ptr_3dlut_rgb);    // free 3DLUT memory
+    gm_dtor(&gamut_map);        // destructor - once per session
+*/
+
+int gm_api_gen_map(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map)
+{
+    int rc;
+
+    /* initialize gamut mapping staructure from api gamut options */
+    if (ptr_gm_opts->update_msk & GM_UPDATE_DST)
+        gm_api_init(ptr_gm_opts, ptr_gamut_map);
+
+    /* init src and dst gamuts */
+    rc = gm_init_gamuts(ptr_gamut_map, &ptr_gm_opts->cs_opts_src, &ptr_gm_opts->cs_opts_dst,
+        ptr_gm_opts->mode, ptr_gm_opts->update_msk);
+
+    /* generate gamut edge and other internal data */
+    if (rc == 0)
+        gm_gen_map(ptr_gamut_map, ptr_gm_opts->update_msk);
+
+    ptr_gm_opts->update_msk = 0;
+
+    return rc;
+}
+
+int gm_api_gen_3dlut(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map)
+{
+    if (ptr_gm_opts->ptr_3dlut_rgb) {
+        gm_gen_3dlut(ptr_gamut_map, ptr_gm_opts->num_pnts_3dlut,
+                ptr_gm_opts->bitwidth_3dlut, ptr_gm_opts->en_merge_3dlut, ptr_gm_opts->ptr_3dlut_rgb);
+        return 0;
+    }
+    return -1; /* something wrong */
+}
+
+void gm_api_set_def(struct s_gm_opts *ptr_gm_opts)
+{
+    int nk;
+
+    ptr_gm_opts->gamut_map_mode = EGMM_NONE;
+    ptr_gm_opts->en_tm_scale_color = 1;
+    ptr_gm_opts->hue_rot_mode = EHRM_NONE;
+    ptr_gm_opts->mode = 0;
+    ptr_gm_opts->step_samp = 0.0005;
+    ptr_gm_opts->map_type = EMT_SEG;
+    ptr_gm_opts->num_hue_pnts = 180;
+    ptr_gm_opts->num_edge_pnts = 121;
+    ptr_gm_opts->num_int_pnts = 33;
+    ptr_gm_opts->org2_perc_c = GM_ORG2_PERC;
+
+    for (nk = 0; nk < GM_NUM_PRIM; nk++) {
+        ptr_gm_opts->vec_org1_factor[nk] = gm_vec_org13_factor_def[nk][0];
+        ptr_gm_opts->vec_org3_factor[nk] = gm_vec_org13_factor_def[nk][1];
+    }
+
+    ptr_gm_opts->reserve = 0;
+    ptr_gm_opts->show_pix_mode = ESPM_NONE;
+
+    for (nk = 0; nk < 2; nk++)
+        ptr_gm_opts->show_pix_hue_limits[nk] = 0.0;
+
+    cs_set_opts_def(&ptr_gm_opts->cs_opts_src);
+    cs_set_opts_def(&ptr_gm_opts->cs_opts_dst);
+
+    ptr_gm_opts->update_msk = GM_UPDATE_SRC | GM_UPDATE_DST;
+
+    ptr_gm_opts->en_merge_3dlut = 0;
+    ptr_gm_opts->num_pnts_3dlut = 17;
+    ptr_gm_opts->bitwidth_3dlut = 12;
+}
+
+void gm_api_init(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map)
+{
+    int nk;
+
+    gm_set_def(ptr_gamut_map);
+
+    ptr_gamut_map->gamut_map_mode = ptr_gm_opts->gamut_map_mode;
+    ptr_gamut_map->en_tm_scale_color = ptr_gm_opts->en_tm_scale_color;
+    ptr_gamut_map->hue_rot_mode = ptr_gm_opts->hue_rot_mode;
+    ptr_gamut_map->mode = ptr_gm_opts->mode;
+    ptr_gamut_map->org2_perc_c = ptr_gm_opts->org2_perc_c;
+
+    for (nk = 0; nk < GM_NUM_PRIM; nk++) {
+        /* Factor of Origin1 for M,R,Y,G,C,B = 1.3, 1.3, 1.3, 1.3, 1.2, 1.0 */
+        ptr_gamut_map->vec_org1_factor[nk] = ptr_gm_opts->vec_org1_factor[nk];
+        /* Factor of Origin3 for M,R,Y,G,C,B = 1.05, 1.1, 1.1, 1.05, 1.01, 1.06 */
+        ptr_gamut_map->vec_org3_factor[nk] = ptr_gm_opts->vec_org3_factor[nk];
+    }
+
+    ptr_gamut_map->step_samp = ptr_gm_opts->step_samp;            /* default is 0.0005 */
+    ptr_gamut_map->map_type = ptr_gm_opts->map_type;            /* default is EMT_SEG */
+    ptr_gamut_map->num_hue_pnts = ptr_gm_opts->num_hue_pnts;    /* default is 181 */
+    ptr_gamut_map->num_edge_pnts = ptr_gm_opts->num_edge_pnts;  /* default is 121 */
+    ptr_gamut_map->num_int_pnts = ptr_gm_opts->num_int_pnts;    /* default is 33 */
+
+    ptr_gamut_map->reserve = ptr_gm_opts->reserve;
+    ptr_gamut_map->show_pix_mode = ptr_gm_opts->show_pix_mode;
+
+    for (nk = 0; nk < 2; nk++)
+        ptr_gamut_map->show_pix_hue_limits[nk] = ptr_gm_opts->show_pix_hue_limits[nk];
+}
+
+#ifndef GM_SIM
+#ifndef LINUX_DM
+#include "dm_services.h"
+#else
+/* TBD: include for LINUX_DM */
+#endif /* LINUX_DM */
+#else
+#include <stdlib.h>
+#endif /* GM_SIM */
+
+void *gm_api_alloc(unsigned int size_bytes, void* mem_ctx)
+{
+#ifndef GM_SIM
+#ifndef LINUX_DM
+    return dm_alloc(size_bytes);
+#else
+    /* TBD: alloc() for LINUX_DM */
+#endif /* LINUX_DM */
+#else
+    return malloc(size_bytes);
+#endif /* GM_SIM */
+}
+
+void gm_api_free(void *ptr_mem, void* mem_ctx)
+{
+#ifndef GM_SIM
+#ifndef LINUX_DM
+    dm_free(ptr_mem);
+#else
+    /* TBD: free() for LINUX_DM */
+#endif /* LINUX_DM */
+#else
+    free(ptr_mem);
+#endif /* GM_SIM */
+}
diff --git a/src/amd/gmlib/gm/gm_api_funcs.h b/src/amd/gmlib/gm/gm_api_funcs.h
new file mode 100755
index 00000000000..902d4ba8382
--- /dev/null
+++ b/src/amd/gmlib/gm/gm_api_funcs.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : gm_api_funcs.h
+ * Purpose    : Gamut Mapping API functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : November 12, 2024
+ * Version    : 3.1
+ *----------------------------------------------------------------------
+ *
+*/
+
+#pragma once
+
+#include "gm_funcs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct s_gm_opts {
+    enum gm_gamut_map_mode    gamut_map_mode;
+    /* Gamut Map Mode: 0 - no gamut map, 1 - Tone Map BT2390-4, 2 - TM+CHTO, 3 - TM+CHSO, 4 - TM+CHCI */
+    enum gm_hue_rot_mode      hue_rot_mode;
+    /* Hue Rotation Mode: 0 - none, 1 - hue rotation, 2 - chroma compression, 3 - hue rotation and chroma compression */
+    int                       en_tm_scale_color;
+    /* Enable/Disable Color Scaling (valid for Tone Mapping mode only): {0,1} = 1    */
+    unsigned int              mode;
+    /* mode = 0 : Reserved for modifications of the Gamut Map algo */
+    /* CHTO tuning parameters */
+    MATFLOAT                  org2_perc_c;
+    /* Origin2 percentage gap for chroma [0.7,095] = 0.9 */
+    MATFLOAT                  vec_org1_factor[GM_NUM_PRIM];
+    /* Factor of Origin1 for M,R,Y,G,C,B [1.0,1.4] = 1.3, 1.3, 1.3, 1.3, 1.2, 1.0 */
+    MATFLOAT                  vec_org3_factor[GM_NUM_PRIM];
+    /* Factor of Origin3 for M,R,Y,G,C,B [1.01,1,2] = 1.05, 1.2, 1.05, 1.05, 1.01, 1.05 */
+    MATFLOAT                  step_samp;
+    /* Sampling precision in IC space for edge search [0.00001,0.001]=0.0001 */
+    enum gm_map_type          map_type;
+    /* Map type: {0,1,2} = 0 : 0 - segments intersection SEG, 1 - radius sampling RAD, 2 hybrid - SEG+RAD */
+    int                       num_hue_pnts;
+    /* Number of hue grid points: [90,360]=360 */
+    int                       num_edge_pnts;
+    /* Number of edge IC grid points: [91, 181] = 181 */
+    int                       num_int_pnts;
+    /* Number of intensity grid points for primary hues: [5,33] = 33 */
+    /* show pixel parameters */
+    int                       reserve;
+    /* Reserved for debugging purpose = 0 */
+    enum gm_show_pix_mode     show_pix_mode;
+    /* EShowPixMode: [0,8]=0 : show pixel debugging mode */
+    MATFLOAT                  show_pix_hue_limits[2];
+    /* Show Pixel mode hue ranges */
+    /* color space parameters */
+    struct s_cs_opts          cs_opts_src;
+    struct s_cs_opts          cs_opts_dst;
+    int                       update_msk;
+    /* Update mask: GM_UPDATE_SRC - update source gamut, GM_UPDATE_DST - update destination gamut */
+    /* 3DLUT parameters */
+    int                       en_merge_3dlut;
+    int                       num_pnts_3dlut;
+    int                       bitwidth_3dlut;
+    unsigned short            *ptr_3dlut_rgb;
+};
+
+int gm_api_gen_map(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map);
+int gm_api_gen_3dlut(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map);
+
+void gm_api_set_def(struct s_gm_opts *ptr_gm_opts);
+void gm_api_init(struct s_gm_opts *ptr_gm_opts, struct s_gamut_map *ptr_gamut_map);
+
+void *gm_api_alloc(unsigned int size_bytes, void* mem_ctx); /* alloc array */
+void gm_api_free(void *ptr_mem, void* mem_ctx); /* free array */
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/amd/gmlib/gm/gm_funcs.c b/src/amd/gmlib/gm/gm_funcs.c
new file mode 100755
index 00000000000..ebc72f90b1f
--- /dev/null
+++ b/src/amd/gmlib/gm/gm_funcs.c
@@ -0,0 +1,1492 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : gm_funcs.c
+ * Purpose    : Gamut Mapping functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : November 12, 2024
+ * Version    : 3.1
+ *---------------------------------------------------------------------
+ *
+ */
+
+#ifndef GM_SIM
+#pragma code_seg("PAGED3PC")
+#pragma data_seg("PAGED3PD")
+#pragma const_seg("PAGED3PR")
+#endif
+
+#include "gm_funcs.h"
+
+static float gm_lin2pq[GM_PQTAB_NUMPNTS];
+static float gm_pq2lin[GM_PQTAB_NUMPNTS];
+
+void gm_ctor(struct s_gamut_map *ptr_gamut_map, void*(*ptr_func_alloc)(unsigned int, void*), void(*ptr_func_free)(void *, void*), void* mem_context)
+{
+    ptr_gamut_map->ptr_func_alloc = ptr_func_alloc;
+    ptr_gamut_map->ptr_func_free = ptr_func_free;
+    ptr_gamut_map->memory_context = mem_context;
+    ptr_gamut_map->ptr_edge_ic = 0;
+    ptr_gamut_map->ptr_hr_src_hc = 0;
+    ptr_gamut_map->ptr_hr_dst_hc = 0;
+    ptr_gamut_map->ptr_org2_ic = 0;
+    ptr_gamut_map->ptr_org3_ic = 0;
+    ptr_gamut_map->ptr_cusp_src_ic = 0;
+    ptr_gamut_map->ptr_cusp_dst_ic = 0;
+
+    gm_gen_pq_lut(gm_lin2pq, GM_PQTAB_NUMPNTS, EGD_LIN_2_NONLIN);
+    gm_gen_pq_lut(gm_pq2lin, GM_PQTAB_NUMPNTS, EGD_NONLIN_2_LIN);
+    gm_set_def(ptr_gamut_map);
+}
+
+void gm_dtor(struct s_gamut_map *ptr_gamut_map)
+{
+    gm_free_mem(ptr_gamut_map);
+
+    ptr_gamut_map->ptr_func_alloc = 0;
+    ptr_gamut_map->ptr_func_free = 0;
+}
+
+void gm_alloc_mem(struct s_gamut_map *ptr_gamut_map)
+{
+    if (ptr_gamut_map->gamut_map_mode > EGMM_TM) {
+        if (ptr_gamut_map->map_type != EMT_RAD)
+            if (ptr_gamut_map->ptr_edge_ic == 0)
+                ptr_gamut_map->ptr_edge_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc(
+                    ptr_gamut_map->num_hue_pnts * ptr_gamut_map->num_edge_pnts * 2 * sizeof(MATFLOAT), 
+                    ptr_gamut_map->memory_context);
+
+        if (ptr_gamut_map->ptr_org2_ic == 0)
+            ptr_gamut_map->ptr_org2_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc(
+                ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT),
+                ptr_gamut_map->memory_context);
+
+        if (ptr_gamut_map->ptr_org3_ic == 0)
+            ptr_gamut_map->ptr_org3_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc(
+                ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT),
+                ptr_gamut_map->memory_context);
+    }
+
+    if (ptr_gamut_map->hue_rot_mode != EHRM_NONE) {
+        if (ptr_gamut_map->ptr_hr_src_hc == 0)
+            ptr_gamut_map->ptr_hr_src_hc = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc(
+                GM_NUM_PRIM * ptr_gamut_map->num_int_pnts * 2 * sizeof(MATFLOAT),
+                ptr_gamut_map->memory_context);
+
+        if (ptr_gamut_map->ptr_hr_dst_hc == 0)
+            ptr_gamut_map->ptr_hr_dst_hc = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc(
+                GM_NUM_PRIM * ptr_gamut_map->num_int_pnts * 2 * sizeof(MATFLOAT),
+                ptr_gamut_map->memory_context);
+
+    }
+
+    if (ptr_gamut_map->ptr_cusp_src_ic == 0)
+        ptr_gamut_map->ptr_cusp_src_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc(
+            ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT),
+            ptr_gamut_map->memory_context);
+
+    if (ptr_gamut_map->ptr_cusp_dst_ic == 0)
+        ptr_gamut_map->ptr_cusp_dst_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc(
+            ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT),
+            ptr_gamut_map->memory_context);
+}
+
+void gm_free_mem(struct s_gamut_map *ptr_gamut_map)
+{
+    if (ptr_gamut_map->ptr_edge_ic) {
+        ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_edge_ic, ptr_gamut_map->memory_context);
+        ptr_gamut_map->ptr_edge_ic = 0;
+    }
+
+    if (ptr_gamut_map->ptr_hr_src_hc) {
+        ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_hr_src_hc, ptr_gamut_map->memory_context);
+        ptr_gamut_map->ptr_hr_src_hc = 0;
+    }
+
+    if (ptr_gamut_map->ptr_hr_dst_hc) {
+        ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_hr_dst_hc, ptr_gamut_map->memory_context);
+        ptr_gamut_map->ptr_hr_dst_hc = 0;
+    }
+
+    if (ptr_gamut_map->ptr_org2_ic) {
+        ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_org2_ic, ptr_gamut_map->memory_context);
+        ptr_gamut_map->ptr_org2_ic = 0;
+    }
+
+    if (ptr_gamut_map->ptr_org3_ic) {
+        ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_org3_ic, ptr_gamut_map->memory_context);
+        ptr_gamut_map->ptr_org3_ic = 0;
+    }
+
+    if (ptr_gamut_map->ptr_cusp_src_ic) {
+        ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_cusp_src_ic, ptr_gamut_map->memory_context);
+        ptr_gamut_map->ptr_cusp_src_ic = 0;
+    }
+
+    if (ptr_gamut_map->ptr_cusp_dst_ic) {
+        ptr_gamut_map->ptr_func_free(ptr_gamut_map->ptr_cusp_dst_ic, ptr_gamut_map->memory_context);
+        ptr_gamut_map->ptr_cusp_dst_ic = 0;
+    }
+}
+
+void gm_set_def(struct s_gamut_map *ptr_gamut_map)
+{
+    int nk;
+
+    ptr_gamut_map->gamut_map_mode = EGMM_NONE;
+    ptr_gamut_map->en_tm_scale_color = 1;
+    ptr_gamut_map->hue_rot_mode = EHRM_NONE;
+    ptr_gamut_map->mode = 0;
+    ptr_gamut_map->num_hue_pnts = GM_NUM_HUE;
+    ptr_gamut_map->num_edge_pnts = GM_NUM_EDGE;
+    ptr_gamut_map->num_int_pnts = GM_NUM_INT;
+    ptr_gamut_map->step_samp = GM_STEP_SAMP;
+    ptr_gamut_map->edge_type = EET_RAD;
+    ptr_gamut_map->map_type = EMT_SEG;
+    ptr_gamut_map->org2_perc_c = GM_ORG2_PERC;
+    for (nk = 0; nk < GM_NUM_PRIM; nk++) {
+        ptr_gamut_map->vec_org1_factor[nk] = gm_vec_org13_factor_def[nk][0];
+        ptr_gamut_map->vec_org3_factor[nk] = gm_vec_org13_factor_def[nk][1];
+    }
+    ptr_gamut_map->reserve = 0;
+    ptr_gamut_map->show_pix_mode = ESPM_NONE;
+    for (nk = 0; nk < 2; nk++)
+        ptr_gamut_map->show_pix_hue_limits[nk] = 0.0;
+}
+
+int gm_init_gamuts(struct s_gamut_map *ptr_gamut_map, struct s_cs_opts *ptr_cs_opts_src,
+    struct s_cs_opts *ptr_cs_opts_dst, unsigned int gm_mode, int update_msk)
+{
+    if (update_msk & GM_UPDATE_SRC) { /* init and generate prim and cusp points for source gamut */
+        cs_init(ptr_cs_opts_src, &ptr_gamut_map->color_space_src);
+        cs_genprim_itp(&ptr_gamut_map->color_space_src, GM_NUM_PRIM, (MATFLOAT *)gm_vec_cusp_rgb,
+            ptr_gamut_map->vec_prim_src_ich);
+    }
+
+    if (update_msk & GM_UPDATE_DST) { /* init and generate prim and cusp points for target gamut */
+        cs_init(ptr_cs_opts_dst, &ptr_gamut_map->color_space_dst);
+        cs_genprim_itp(&ptr_gamut_map->color_space_dst, GM_NUM_PRIM, (MATFLOAT *)gm_vec_cusp_rgb,
+            ptr_gamut_map->vec_prim_dst_ich);
+    }
+
+    /* calculate Luma Min/Max for Tone Mapping */
+    if ((update_msk & GM_UPDATE_SRC) || (update_msk & GM_UPDATE_DST)) {
+        MATFLOAT luma_rng_src = ptr_gamut_map->color_space_src.luma_limits[1] -
+            ptr_gamut_map->color_space_src.luma_limits[0];
+        ptr_gamut_map->lum_min = (ptr_gamut_map->color_space_dst.luma_limits[0] -
+            ptr_gamut_map->color_space_src.luma_limits[0]) / luma_rng_src;
+        ptr_gamut_map->lum_max = (ptr_gamut_map->color_space_dst.luma_limits[1] -
+            ptr_gamut_map->color_space_src.luma_limits[0]) / luma_rng_src;
+    }
+
+    if (update_msk & GM_UPDATE_DST) {
+        gm_free_mem(ptr_gamut_map);
+        gm_alloc_mem(ptr_gamut_map);
+    }
+
+    if (ptr_gamut_map->hue_rot_mode != EHRM_NONE) {    /* generate prim for intensity points */
+        /* memory for src cusp points is reallocated if GM_UPDATE_DST */
+        if ((update_msk & GM_UPDATE_SRC) || (update_msk & GM_UPDATE_DST))
+            gm_genprim_hc(&ptr_gamut_map->color_space_src, ptr_gamut_map->ptr_hr_src_hc,
+                ptr_gamut_map->num_int_pnts, ptr_gamut_map->color_space_dst.luma_limits,
+                ptr_gamut_map->lum_min, ptr_gamut_map->lum_max);
+        if (update_msk & GM_UPDATE_DST)
+            gm_genprim_hc(&ptr_gamut_map->color_space_dst, ptr_gamut_map->ptr_hr_dst_hc,
+                ptr_gamut_map->num_int_pnts, ptr_gamut_map->color_space_dst.luma_limits, 0.0, 1.0); /* no TM */
+    }
+
+    /* memory for src cusp points is reallocated if GM_UPDATE_DST */
+    if ((update_msk & GM_UPDATE_SRC) || (update_msk & GM_UPDATE_DST))
+        gm_gencusp_ic(ptr_gamut_map, 0); /* generate cusp points for source gamut */
+
+    if (update_msk & GM_UPDATE_DST)
+        gm_gencusp_ic(ptr_gamut_map, 1); /* generate cusp points for target gamut */
+
+    ptr_gamut_map->mode = gm_mode;
+    ptr_gamut_map->hue_max = 2.0 * mat_get_pi() * (1.0 - 1.0 / (MATFLOAT)ptr_gamut_map->num_hue_pnts);
+    ptr_gamut_map->org1 = mat_denorm(GM_ORG1_FACTOR, ptr_gamut_map->color_space_dst.luma_limits[0],
+        ptr_gamut_map->color_space_dst.luma_limits[2]);
+    ptr_gamut_map->org3 = mat_denorm(GM_ORG3_FACTOR, ptr_gamut_map->color_space_dst.luma_limits[0],
+        ptr_gamut_map->color_space_dst.luma_limits[2]);
+
+    return 0;
+}
+
+int gm_check_gamut(struct s_gamut_map *ptr_gamut_map)
+{
+    struct s_color_space* ptr_cs_src = &ptr_gamut_map->color_space_src;
+    struct s_color_space* ptr_cs_dst = &ptr_gamut_map->color_space_dst;
+
+    if (ptr_gamut_map->gamut_map_mode != EGMM_NONE)
+        if ((ptr_cs_src->luminance_limits[0] > ptr_cs_dst->luminance_limits[0]) ||
+            (ptr_cs_src->luminance_limits[1] < ptr_cs_dst->luminance_limits[1])) {
+            ptr_gamut_map->gamut_map_mode = EGMM_NONE;
+            ptr_gamut_map->hue_rot_mode = EHRM_NONE;
+            return -1;    /* non valid luminance limits */
+        }
+
+    return 0; /* valid parameters */
+}
+
+void gm_gencusp_ic(struct s_gamut_map *ptr_gamut_map, int color_space)
+{
+    struct s_color_space *ptr_color_space = color_space ? &ptr_gamut_map->color_space_dst : &ptr_gamut_map->color_space_src;
+    MATFLOAT *ptr_cusp_ic = color_space ? ptr_gamut_map->ptr_cusp_dst_ic : ptr_gamut_map->ptr_cusp_src_ic;
+    int num_phases = ptr_gamut_map->num_hue_pnts / GM_NUM_PRIM;
+    int index = 0;
+    MATFLOAT *ptr_hue = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc(ptr_gamut_map->num_hue_pnts * sizeof(MATFLOAT), 
+        ptr_gamut_map->memory_context);
+    MATFLOAT *ptr_ic = (MATFLOAT *)ptr_gamut_map->ptr_func_alloc(ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT),
+        ptr_gamut_map->memory_context);
+    MATFLOAT rgb[3], itp[3];
+    int np, ni, nc;
+
+    for (np = 0; np < GM_NUM_PRIM; np++) {
+        for (ni = 0; ni < num_phases; ni++) {
+            MATFLOAT phase = (MATFLOAT)ni / (MATFLOAT)num_phases;
+
+            int ind0 = np;
+            int ind1 = (ind0 + 1) % GM_NUM_PRIM;
+            for (nc = 0; nc < 3; nc++) {
+                MATFLOAT val0 = gm_vec_cusp_rgb[ind0][nc];
+                MATFLOAT val1 = gm_vec_cusp_rgb[ind1][nc];
+
+                rgb[nc] = val0 + (val1 - val0) * phase;
+            }
+            cs_gamma_rgb(rgb, rgb, ptr_color_space->gamma_parm, EGD_NONLIN_2_LIN);    /* TBD */
+            cs_denorm_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[2]);
+            cs_clamp_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[1]);
+            cs_rgb_to_itp(ptr_color_space, rgb, itp);
+
+            if (color_space == 0) { /* tm and hr for source gamut */
+                if (ptr_gamut_map->gamut_map_mode != EGMM_NONE) {
+                    if ((ptr_gamut_map->lum_min > 0.0) || (ptr_gamut_map->lum_max < 1.0))
+                        itp[0] = gm_tm_luma(itp[0], ptr_gamut_map->color_space_src.luma_limits,
+                                ptr_gamut_map->lum_min, ptr_gamut_map->lum_max);
+                    if (ptr_gamut_map->hue_rot_mode != EHRM_NONE)
+                        gm_hr_itp(ptr_gamut_map, itp, itp, 0);
+                }
+            }
+
+            ptr_ic[2 * index + 0] = itp[0];
+            ptr_ic[2 * index + 1] = mat_radius(itp[2], itp[1]);
+            ptr_hue[index] = mat_angle(itp[2], itp[1]);
+            index++;
+        }
+    }
+
+    gm_resample_hue_ic(ptr_hue, ptr_ic, ptr_cusp_ic, ptr_gamut_map->num_hue_pnts, ptr_gamut_map->num_hue_pnts);
+
+    ptr_gamut_map->ptr_func_free(ptr_ic, ptr_gamut_map->memory_context);
+    ptr_gamut_map->ptr_func_free(ptr_hue, ptr_gamut_map->memory_context);
+}
+
+void gm_gen_edge_hue(struct s_gamut_map *ptr_gamut_map, int hue_ind)
+{
+    MATFLOAT fHue = mat_index_to_flt(hue_ind, ptr_gamut_map->hue_max, ptr_gamut_map->num_hue_pnts);
+
+    gm_genedge(&ptr_gamut_map->color_space_dst, ptr_gamut_map->color_space_dst.luma_limits,
+        ptr_gamut_map->num_edge_pnts, ptr_gamut_map->edge_type, ptr_gamut_map->step_samp, fHue,
+        &ptr_gamut_map->ptr_edge_ic[hue_ind * ptr_gamut_map->num_edge_pnts * 2], 
+        ptr_gamut_map->mode & GM_PQTAB_GBD);
+
+    /* correct edge for target cusp point - optional */
+    if (ptr_gamut_map->mode & GM_CUSP_ADJUST)
+        gm_edgecusp_adjust(&ptr_gamut_map->ptr_edge_ic[hue_ind * ptr_gamut_map->num_edge_pnts * 2],
+            ptr_gamut_map->num_edge_pnts, &ptr_gamut_map->ptr_cusp_dst_ic[hue_ind * 2]);
+}
+
+/* resample to uniform hue */
+void gm_resample_hue_ic(MATFLOAT *ptr_hue, MATFLOAT *ptr_ic_inp, MATFLOAT *ptr_ic_out, int num_hue_pnts_inp, int num_hue_pnts_out)
+{
+    const MATFLOAT gm_2pi = 2.0 * mat_get_pi();
+    int index_2pi = mat_get_hue_index_2pi(ptr_hue, num_hue_pnts_inp);
+    int ind1 = index_2pi;
+    int ind0 = (ind1 > 0) ? ind1 - 1 : num_hue_pnts_inp - 1;
+    MATFLOAT tar_inc_out = gm_2pi / (MATFLOAT)num_hue_pnts_out;
+    MATFLOAT tar_acc_out = 0.0;
+    MATFLOAT tar_inc_inp = ptr_hue[ind1] - ptr_hue[ind0];
+    int ni;
+
+    if (tar_inc_inp < 0.0)
+        tar_inc_inp += gm_2pi;
+
+    for (ni = 0; ni < num_hue_pnts_out; ni++) {
+        MATFLOAT hue = ptr_hue[ind1];
+        MATFLOAT delta_src, phs_src;
+
+        if ((ind1 == index_2pi) && (ni > num_hue_pnts_out / 2))
+            hue += gm_2pi;
+
+        while (tar_acc_out >= hue) {
+            ind0 = (ind0 + 1) % num_hue_pnts_inp;
+            ind1 = (ind1 + 1) % num_hue_pnts_inp;
+            hue = ptr_hue[ind1];
+            if ((ind1 == index_2pi) && (ni > num_hue_pnts_out / 2)) {
+                hue += gm_2pi;
+            }
+            tar_inc_inp = ptr_hue[ind1] - ptr_hue[ind0];
+
+            if (tar_inc_inp < 0.0)
+                tar_inc_inp += gm_2pi;
+        }
+        delta_src = tar_acc_out - ptr_hue[ind0];
+        if (delta_src < 0.0)
+            delta_src += gm_2pi;
+        phs_src = delta_src / tar_inc_inp;
+
+        ptr_ic_out[2 * ni + 0] = ptr_ic_inp[2 * ind0 + 0] + (ptr_ic_inp[2 * ind1 + 0] - ptr_ic_inp[2 * ind0 + 0]) * phs_src;
+        ptr_ic_out[2 * ni + 1] = ptr_ic_inp[2 * ind0 + 1] + (ptr_ic_inp[2 * ind1 + 1] - ptr_ic_inp[2 * ind0 + 1]) * phs_src;
+
+        tar_acc_out += tar_inc_out;
+    }
+}
+
+/* calculate hue for primary colors for normilized uniform intensity */
+void gm_genprim_hc(struct s_color_space *ptr_color_space, MATFLOAT *ptr_hr_hc, int num_int_pnts,
+    MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max)
+{
+    MATFLOAT step = 1.0 / (MATFLOAT)(num_int_pnts - 1);
+    MATFLOAT vec_prim_ich[GM_NUM_INT][3];
+    MATFLOAT prim_rgb[3], rgb[3], itp_src[3];
+    int nk, ni, nc;
+
+    for (nk = 0; nk < GM_NUM_PRIM; nk++) {
+        mat_copy((MATFLOAT *)gm_vec_cusp_rgb[nk], prim_rgb, 3);
+        for (ni = 0; ni < num_int_pnts; ni++) {
+            for (nc = 0; nc < 3; nc++)
+                rgb[nc] = prim_rgb[nc] * (MATFLOAT)ni * step;
+            /* generate gamut prim points */
+            cs_gamma_rgb(rgb, rgb, ptr_color_space->gamma_parm, EGD_NONLIN_2_LIN);
+            cs_denorm_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[2]);
+            cs_clamp_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[1]);
+            cs_rgb_to_itp(ptr_color_space, rgb, itp_src);
+            if ((lum_min > 0.0) || (lum_max < 1.0))
+                itp_src[0] = gm_tm_luma(itp_src[0], ptr_color_space->luma_limits, lum_min, lum_max);
+            cs_itp_to_ich(itp_src, vec_prim_ich[ni]);
+            vec_prim_ich[ni][0] = mat_norm(vec_prim_ich[ni][0], luma_limits[0], luma_limits[2]);
+            /* normilize to [0.0,1.0] from target luma limits */
+            vec_prim_ich[ni][0] = MAT_CLAMP(vec_prim_ich[ni][0], 0.0, 1.0);
+        }
+        /* update Intensity=0.0 point */
+        vec_prim_ich[0][0] = 0.0;
+        vec_prim_ich[0][1] = 0.0;
+        vec_prim_ich[0][2] = vec_prim_ich[1][2];
+        /* update Intensity=1.0 point */
+        vec_prim_ich[num_int_pnts - 1][0] = 1.0;
+        vec_prim_ich[num_int_pnts - 1][1] = 0.0;
+        vec_prim_ich[num_int_pnts - 1][2] = vec_prim_ich[num_int_pnts - 2][2];
+        /* resample to uniform intensity */
+        gm_resample_hc(vec_prim_ich, &ptr_hr_hc[nk * num_int_pnts * 2], num_int_pnts, num_int_pnts);
+    }
+}
+
+/* calculate origin1 and origin1 factor */
+void gm_genorg13_factor(struct s_gamut_map* ptr_gamut_map, MATFLOAT* ptr_org13_factor)
+{
+    MATFLOAT vec_org13_factor_prim[GM_NUM_PRIM * 2];
+    int ni;
+
+    for (ni = 0; ni < GM_NUM_PRIM; ni++) {
+        vec_org13_factor_prim[2 * ni + 0] = ptr_gamut_map->vec_org1_factor[ni];
+        vec_org13_factor_prim[2 * ni + 1] = ptr_gamut_map->vec_org3_factor[ni];
+    }
+    gm_resample_hue_ic(&ptr_gamut_map->vec_prim_dst_ich[2 * GM_NUM_PRIM], vec_org13_factor_prim,
+        ptr_org13_factor, GM_NUM_PRIM, ptr_gamut_map->num_hue_pnts);
+}
+
+void gm_genorigin23_hue(struct s_gamut_map* ptr_gamut_map, MATFLOAT* ptr_org13_factor, int hue_ind)
+{
+    MATFLOAT hue = mat_index_to_flt(hue_ind, ptr_gamut_map->hue_max, ptr_gamut_map->num_hue_pnts);
+    MATFLOAT cusp_ich_src[3], cusp_ich_dst[3];
+    MATFLOAT org_13[2];
+
+    cusp_ich_src[0] = ptr_gamut_map->ptr_cusp_src_ic[2 * hue_ind + 0];
+    cusp_ich_src[1] = ptr_gamut_map->ptr_cusp_src_ic[2 * hue_ind + 1];
+    cusp_ich_src[2] = hue;
+
+    cusp_ich_dst[0] = ptr_gamut_map->ptr_cusp_dst_ic[2 * hue_ind + 0];
+    cusp_ich_dst[1] = ptr_gamut_map->ptr_cusp_dst_ic[2 * hue_ind + 1];
+    cusp_ich_dst[2] = hue;
+
+    /* get Org1 */
+    org_13[0] = (ptr_org13_factor[2 * hue_ind + 0] >= 1.0) ?
+        ptr_gamut_map->org1 * ptr_org13_factor[2 * hue_ind + 0] :
+        ptr_gamut_map->org1 + (cusp_ich_dst[0] - ptr_gamut_map->org1) * ptr_org13_factor[2 * hue_ind + 0];
+    org_13[0] = MAT_CLAMP(org_13[0], ptr_gamut_map->org1, cusp_ich_dst[0]);
+    /* get Org3 */
+    org_13[1] = ptr_gamut_map->org3 * ptr_org13_factor[2 * hue_ind + 1];
+    /* calculate Origin2 and Origin3 */
+    gm_getorigin23(&ptr_gamut_map->color_space_src, &ptr_gamut_map->color_space_dst, hue, org_13, ptr_gamut_map->org2_perc_c,
+        cusp_ich_src, cusp_ich_dst, &ptr_gamut_map->ptr_org2_ic[2 * hue_ind], &ptr_gamut_map->ptr_org3_ic[2 * hue_ind],
+        ptr_gamut_map->mode & GM_PQTAB_GBD);
+}
+
+void gm_getorigin23(struct s_color_space *ptr_color_space_src, struct s_color_space *ptr_color_space_dst,
+    MATFLOAT hue, MATFLOAT org_13_factor[2], MATFLOAT org2_perc_c,
+    MATFLOAT cusp_ic_src[2], MATFLOAT cusp_ic_dst[2],
+    MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], int en_pq_lut)
+{
+
+    if ((cusp_ic_src[0] <= cusp_ic_dst[0]) || (cusp_ic_src[1] <= cusp_ic_dst[1])) {
+        origin2_ic[0] = org_13_factor[0];
+        origin2_ic[1] = 0.0;
+        origin3_ic[0] = org_13_factor[1];
+        origin3_ic[1] = (origin3_ic[0] - origin2_ic[0]) * cusp_ic_dst[1] / (cusp_ic_dst[0] - origin2_ic[0]);
+        return;
+    }
+
+    MATFLOAT slope = (cusp_ic_src[0] - cusp_ic_dst[0]) / (cusp_ic_src[1] - cusp_ic_dst[1]);
+    MATFLOAT offset = cusp_ic_dst[0] - slope * cusp_ic_dst[1];
+
+    /* get Origin2 point */
+    origin2_ic[0] = org_13_factor[0];
+    origin2_ic[1] = (origin2_ic[0] - offset) / slope;
+    if (origin2_ic[1] < 0.0) {
+        origin2_ic[0] = origin2_ic[0] - origin2_ic[1] * slope;
+        origin2_ic[1] = 0.0;
+    } else {
+        MATFLOAT ic_tmp[2];
+        MATFLOAT ic_dst[2] = { origin2_ic[0], origin2_ic[1] };
+        MATFLOAT ic_src[2] = { origin2_ic[0], origin2_ic[1] };
+        MATFLOAT inc_ic[2] = { 0.0, GM_STEP_SAMP * 10.0 };
+        MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) };
+
+        gm_sample_edge_ic(ptr_color_space_dst, hue_sin_cos, inc_ic, ic_dst, en_pq_lut);
+        gm_sample_edge_ic(ptr_color_space_src, hue_sin_cos, inc_ic, ic_src, en_pq_lut);
+        if (ic_src[1] < ic_dst[1]) {
+            ic_tmp[0] = ic_src[0];
+            ic_tmp[1] = ic_src[1];
+        } else {
+            ic_tmp[0] = ic_dst[0];
+            ic_tmp[1] = ic_dst[1];
+        }
+        if (origin2_ic[1] > org2_perc_c * ic_tmp[1]) {
+            origin2_ic[1] = org2_perc_c * ic_tmp[1];
+            slope = (cusp_ic_src[0] - origin2_ic[0]) / (cusp_ic_src[1] - origin2_ic[1]);
+            offset = origin2_ic[0] - slope * origin2_ic[1];
+        }
+    }
+    /* get Origin3 point */
+    origin3_ic[0] = org_13_factor[1];
+    origin3_ic[1] = (origin3_ic[0] - offset) / slope;
+}
+
+/* resmapling for uniform normilized Intensity in a range [0.0,1.0] */
+void gm_resample_hc(MATFLOAT vec_ich_inp[][3], MATFLOAT *ptr_hc_out, int num_int_pnts_inp, int num_int_pnts_out)
+{
+    MATFLOAT tar_inc_out = 1.0 / (MATFLOAT)(num_int_pnts_out - 1);
+    MATFLOAT tar_inc_inp = vec_ich_inp[1][0] - vec_ich_inp[0][0];
+    MATFLOAT tar_acc_out = 0.0;
+    MATFLOAT phs_inp;
+    int ind0 = 0;
+    int ind1 = 1;
+    int ni;
+
+    for (ni = 0; ni < num_int_pnts_out; ni++) {
+        while ((tar_acc_out >= vec_ich_inp[ind1][0]) && (ind1 > ind0)) {
+            ind0 = MAT_MIN(ind0 + 1, num_int_pnts_inp - 1);
+            ind1 = MAT_MIN(ind1 + 1, num_int_pnts_inp - 1);
+            tar_inc_inp = vec_ich_inp[ind1][0] - vec_ich_inp[ind0][0];
+        }
+        phs_inp = (tar_inc_inp == 0.0) ? 0.0 : (tar_acc_out - vec_ich_inp[ind0][0]) / tar_inc_inp;
+        ptr_hc_out[ni * 2 + 0] = vec_ich_inp[ind0][2] + (vec_ich_inp[ind1][2] - vec_ich_inp[ind0][2]) * phs_inp;
+        ptr_hc_out[ni * 2 + 1] = vec_ich_inp[ind0][1] + (vec_ich_inp[ind1][1] - vec_ich_inp[ind0][1]) * phs_inp;
+        tar_acc_out += tar_inc_out;
+    }
+}
+
+int gm_rgb_to_rgb(struct s_gamut_map* ptr_gamut_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3])
+{    /* rgb_inp - linear space, linear space */
+    MATFLOAT itp_inp[3], itp_out[3];
+    int zone = 0;
+
+    if (ptr_gamut_map->gamut_map_mode != EGMM_NONE) {
+        gm_rgb_to_itp(&ptr_gamut_map->color_space_src, rgb_inp, itp_inp, ptr_gamut_map->mode & GM_PQTAB_3DLUT);
+        zone = gm_map_itp(ptr_gamut_map, itp_inp, itp_out);
+        gm_itp_to_rgb(&ptr_gamut_map->color_space_dst, itp_out, rgb_out, ptr_gamut_map->mode & GM_PQTAB_3DLUT);
+    }
+    else
+        mat_copy(rgb_inp, rgb_out, 3);
+
+    return zone;
+}
+
+/* input and output lumas are in a range [luma_limits[0], luma_limits[1]] */
+MATFLOAT gm_tm_itp(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], MATFLOAT luma_limits[3],
+    MATFLOAT lum_min, MATFLOAT lum_max, int en_tm_scale_color, int en_tm_scale_luma)
+{
+    MATFLOAT color_scale = 1.0;
+    MATFLOAT luma_inp = itp_inp[0];
+
+    if (en_tm_scale_luma) /* LUMA scaling */
+        itp_out[0] = gm_scale_luma(luma_inp, luma_limits, lum_min, lum_max);
+    else /* LUMA correction as in BT.2390 */
+        itp_out[0] = gm_tm_luma(luma_inp, luma_limits, lum_min, lum_max);
+
+    /* CHROMA correction as in BT.2390 */
+    if (en_tm_scale_color && (itp_out[0] != luma_inp)) {
+        color_scale = (itp_out[0] < luma_inp) ? itp_out[0] / luma_inp : luma_inp / itp_out[0];
+        itp_out[1] = itp_inp[1] * color_scale;
+        itp_out[2] = itp_inp[2] * color_scale;
+    }
+    else {
+        itp_out[1] = itp_inp[1];
+        itp_out[2] = itp_inp[2];
+    }
+
+    return color_scale;
+}
+
+/* input and output lumas are in a range [luma_limits[0], luma_limits[1]] */
+MATFLOAT gm_tm_luma(MATFLOAT luma, MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max)
+{
+    const MATFLOAT cfEpsilon = 0.000001;
+    MATFLOAT ks = (1.5 * lum_max) - 0.5;
+    MATFLOAT b = lum_min;
+    MATFLOAT e0, e1, e2, e3, e4;
+
+    /* Input luma must be normilized to [0.0,1.0] */
+    e0 = luma;
+    e1 = mat_norm(e0, luma_limits[0], luma_limits[2]);
+    e1 = mat_clamp(e1, 0.0, 1.0);
+
+    if (e1 < ks) {
+        e2 = e1;
+    } else {
+        MATFLOAT t = ((1.0 - ks) <= cfEpsilon) ? (e1 - ks) : ((e1 - ks) / (1.0 - ks));
+        MATFLOAT t2 = t * t;
+        MATFLOAT t3 = t2 * t;
+
+        e2 = (((2.0 * t3) - (3.0 * t2) + 1.0) * ks) + ((t3 - (2.0 * t2) + t) * (1.0 - ks)) + (((-2.0 * t3) + (3.0 * t2)) * lum_max);
+    }
+    e3 = e2 + b * mat_pow((1.0 - e2), 4.0);
+
+    /* Output luma must be denormilized back to [i_afLumaLim[0], i_afLumaLim[1]] */
+    e4 = mat_denorm(e3, luma_limits[0], luma_limits[2]);
+    e4 = mat_clamp(e4, luma_limits[0], luma_limits[1]);
+
+    return e4;
+}
+
+/* input and output lumas are in a range [luma_limits[0], luma_limits[1]] */
+MATFLOAT gm_scale_luma(MATFLOAT luma, MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max)
+{
+    MATFLOAT e0, e1, e2, e3, e4;
+
+    /* Input luma must be normilized to [0.0,1.0] */
+    e0 = luma;
+    e1 = mat_norm(e0, luma_limits[0], luma_limits[2]);
+    e1 = mat_clamp(e1, 0.0, 1.0);
+
+    e2 = (e1 - lum_min) * (lum_max - lum_min);
+    e3 = e2 + lum_min;
+
+    /* Output luma must be denormilized back to [i_afLumaLim[0], i_afLumaLim[1]] */
+    e4 = mat_denorm(e3, luma_limits[0], luma_limits[2]);
+    e4 = mat_clamp(e4, luma_limits[0], luma_limits[1]);
+
+    return e4;
+}
+
+int gm_map_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3])
+{
+    int zone = 0;
+    MATFLOAT itp_tm[3], itp_hr[3];
+
+    /* tone map */
+    if ((ptr_gamut_map->lum_min > 0.0) || (ptr_gamut_map->lum_max < 1.0))
+        gm_tm_itp(itp_inp, itp_tm, ptr_gamut_map->color_space_src.luma_limits,
+            ptr_gamut_map->lum_min, ptr_gamut_map->lum_max,
+            (ptr_gamut_map->gamut_map_mode == EGMM_TM) ? ptr_gamut_map->en_tm_scale_color : 0,
+            (ptr_gamut_map->mode & GM_SCALE_LUMA) ? 1 : 0);
+    else
+        mat_copy(itp_inp, itp_tm, 3);
+
+    /* hue rotation */
+    if (ptr_gamut_map->hue_rot_mode != EHRM_NONE)
+        gm_hr_itp(ptr_gamut_map, itp_tm, itp_hr, 0);
+    else
+        mat_copy(itp_tm, itp_hr, 3);
+
+    /* color map */
+    switch (ptr_gamut_map->gamut_map_mode) {
+    case EGMM_TM_CHCI:
+        zone = gm_map_chci_itp(ptr_gamut_map, itp_hr, itp_out);
+        break;
+    case EGMM_TM_CHSO:
+        zone = gm_map_chso_itp(ptr_gamut_map, itp_hr, itp_out);
+        break;
+    case EGMM_TM_CHTO:
+        zone = gm_map_chto_itp(ptr_gamut_map, itp_hr, itp_out);
+        break;
+    case EGMM_TM:
+    default:
+        mat_copy(itp_hr, itp_out, 3);
+        break;
+    }
+
+    return zone;
+}
+
+int gm_map_chto_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3])
+{
+    const MATFLOAT gm_2pi = 2.0 * mat_get_pi();
+    int zone;
+    int pnt_map = -1;
+    int vec_hue_ind[2];
+    MATFLOAT hue, hue_phs;
+    MATFLOAT origin2_ic[2], origin3_ic[2];
+
+    if (gm_is_valid_itp(&ptr_gamut_map->color_space_dst, itp_inp, ptr_gamut_map->mode & GM_PQTAB_3DLUT)) {
+        mat_copy(itp_inp, itp_out, 3);
+        return 0;
+    }
+
+    hue = mat_angle(itp_inp[2], itp_inp[1]);
+    hue_phs = gm_hue_to_index_phase(hue, gm_2pi, ptr_gamut_map->num_hue_pnts, vec_hue_ind);
+    gm_interp_ic(vec_hue_ind, hue_phs, ptr_gamut_map->ptr_org2_ic, origin2_ic);
+    gm_interp_ic(vec_hue_ind, hue_phs, ptr_gamut_map->ptr_org3_ic, origin3_ic);
+
+    zone = gm_get_zone(itp_inp, origin2_ic, origin3_ic, ptr_gamut_map->color_space_dst.luma_limits);
+    if ((ptr_gamut_map->mode & GM_ZONE1_FLEX) && (zone == 1)) {
+        /* correct origin2 for zone 1 to prevent noise bursting for dim content */
+        MATFLOAT int0 = ptr_gamut_map->color_space_dst.luma_limits[0];
+        MATFLOAT int1 = origin2_ic[0];
+        MATFLOAT range_int = int1 - int0;
+        MATFLOAT thresh_int = (int1 + int0) / 2.0;
+        MATFLOAT phase;
+
+        if (itp_inp[0] < thresh_int) {
+            phase = (itp_inp[0] - int0) / range_int;
+            origin2_ic[0] = itp_inp[0] + (int1 - itp_inp[0]) * phase;
+        } else {
+            phase = (int1 - itp_inp[0]) / range_int;
+            origin2_ic[0] = int1 + (itp_inp[0] - int1) * phase;
+        }
+    }
+
+    switch (ptr_gamut_map->map_type) {
+    case EMT_SEG:
+        pnt_map = gm_map_seg_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, vec_hue_ind, hue_phs);
+        break;
+    case EMT_RAD:
+        pnt_map = gm_map_rad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue);
+        break;
+    case EMT_SEGRAD:
+        pnt_map = gm_map_segrad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue, vec_hue_ind, hue_phs);
+        break;
+    default:
+        mat_copy(itp_inp, itp_out, 3);
+        break;
+    }
+
+    return zone;
+}
+
+int gm_map_chso_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3])
+{
+    const MATFLOAT gm_2pi = 2.0 * mat_get_pi();
+    int zone = 1;
+    int pnt_map = -1;
+    int vec_hue_ind[2];
+    MATFLOAT hue, hue_phs;
+    MATFLOAT origin2_ic[2], origin3_ic[2];
+
+    if (gm_is_valid_itp(&ptr_gamut_map->color_space_dst, itp_inp, ptr_gamut_map->mode & GM_PQTAB_3DLUT)) {
+        mat_copy(itp_inp, itp_out, 3);
+        return 0;
+    }
+
+    hue = mat_angle(itp_inp[2], itp_inp[1]);
+    hue_phs = gm_hue_to_index_phase(hue, gm_2pi, ptr_gamut_map->num_hue_pnts, vec_hue_ind);
+    gm_interp_ic(vec_hue_ind, hue_phs, ptr_gamut_map->ptr_org2_ic, origin2_ic);
+    origin2_ic[1] = 0.0;
+    origin3_ic[0] = itp_inp[0];
+    origin3_ic[1] = mat_radius(itp_inp[2], itp_inp[1]);    /* chroma */
+
+    switch (ptr_gamut_map->map_type) {
+    case EMT_SEG:
+        pnt_map = gm_map_seg_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, vec_hue_ind, hue_phs);
+        break;
+    case EMT_RAD:
+        pnt_map = gm_map_rad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue);
+        break;
+    case EMT_SEGRAD:
+        pnt_map = gm_map_segrad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue, vec_hue_ind, hue_phs);
+        break;
+    default:
+        mat_copy(itp_inp, itp_out, 3);
+        break;
+    }
+
+    return zone;
+}
+
+int gm_map_chci_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3])
+{
+    const MATFLOAT gm_2pi = 2.0 * mat_get_pi();
+    int zone = 1;
+    int pnt_map = -1;
+    MATFLOAT origin2_ic[2] = { itp_inp[0], 0.0 };
+    MATFLOAT origin3_ic[2] = { itp_inp[0], 0.0 };
+    int vec_hue_ind[2];
+    MATFLOAT hue, hue_phs;
+
+    if (gm_is_valid_itp(&ptr_gamut_map->color_space_dst, itp_inp, ptr_gamut_map->mode & GM_PQTAB_3DLUT)) {
+        mat_copy(itp_inp, itp_out, 3);
+        return 0;
+    }
+
+    hue = mat_angle(itp_inp[2], itp_inp[1]);
+    hue_phs = gm_hue_to_index_phase(hue, gm_2pi, ptr_gamut_map->num_hue_pnts, vec_hue_ind);
+    switch (ptr_gamut_map->map_type) {
+    case EMT_SEG:
+        pnt_map = gm_map_seg_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, vec_hue_ind, hue_phs);
+        break;
+    case EMT_RAD:
+        pnt_map = gm_map_rad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue);
+        break;
+    case EMT_SEGRAD:
+        pnt_map = gm_map_segrad_itp(ptr_gamut_map, itp_inp, itp_out, zone, origin2_ic, origin3_ic, hue, vec_hue_ind, hue_phs);
+        break;
+    default:
+        mat_copy(itp_inp, itp_out, 3);
+        break;
+    }
+
+    return zone;
+}
+
+/* direction : 0 - src to dst (forward), 1 - dst to src (backward) */
+void gm_hr_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int direction)
+{
+    MATFLOAT ich_inp[3], ich_out[3];
+
+    cs_itp_to_ich(itp_inp, ich_inp);
+    gm_hr_ich(ptr_gamut_map, ich_inp, ich_out, direction);
+    cs_ich_to_itp(ich_out, itp_out);
+}
+
+/* direction : 0 - src to dst (forward), 1 - dst to src (backward) */
+void gm_hr_ich(struct s_gamut_map *ptr_gamut_map, MATFLOAT ich_inp[3], MATFLOAT ich_out[3], int direction)
+{
+    MATFLOAT *ptr_hr_src_hc = direction ? ptr_gamut_map->ptr_hr_dst_hc : ptr_gamut_map->ptr_hr_src_hc;
+    MATFLOAT *ptr_hr_dst_hc = direction ? ptr_gamut_map->ptr_hr_src_hc : ptr_gamut_map->ptr_hr_dst_hc;
+    MATFLOAT rot_hs_cg[2];
+
+    /* get hue shift and chroma gain parameeters */
+    gm_get_hr_parms(ich_inp, ptr_gamut_map->color_space_dst.luma_limits, ptr_hr_src_hc, ptr_hr_dst_hc, ptr_gamut_map->num_int_pnts, rot_hs_cg);
+
+    ich_out[0] = ich_inp[0];
+    ich_out[1] = (ptr_gamut_map->hue_rot_mode & GM_CHROMA_GAIN) ? ich_inp[1] * rot_hs_cg[1] : ich_inp[1];
+    ich_out[2] = (ptr_gamut_map->hue_rot_mode & GM_HUE_SHIFT) ? mat_norm_angle(ich_inp[2] + rot_hs_cg[0]) : ich_inp[2];
+}
+
+void gm_get_hr_parms(MATFLOAT ich[3], MATFLOAT luma_limits[3], MATFLOAT *ptr_hr_src_hc,
+        MATFLOAT *ptr_hr_dst_hc, int num_int_pnts, MATFLOAT rot_hs_cg[2])
+{
+    const MATFLOAT gm_2pi = 2.0 * mat_get_pi();
+    MATFLOAT vec_hc_src[2][GM_NUM_PRIM], vec_hc_dst[2][GM_NUM_PRIM];
+    MATFLOAT int_src, hue_src, hue_dst, chroma_src, chroma_dst;
+    int vec_int_ind[2];
+    MATFLOAT int_phs;
+    int vec_hue_ind[2];
+    MATFLOAT hue_phs;
+    int nk, ni;
+
+    hue_src = ich[2];
+    int_src = mat_norm(ich[0], luma_limits[0], luma_limits[2]);    /* normilize to [0.0,1.0] */
+    int_phs = mat_flt_to_index_phase(int_src, 1.0, num_int_pnts, vec_int_ind);
+    for (nk = 0; nk < GM_NUM_PRIM; nk++) {
+        int ind0 = (nk * num_int_pnts + vec_int_ind[0]) * 2;
+        int ind1 = (nk * num_int_pnts + vec_int_ind[1]) * 2;
+        for (ni = 0; ni < 2; ni++) {
+            vec_hc_src[ni][nk] = ptr_hr_src_hc[ind0 + ni] + (ptr_hr_src_hc[ind1 + ni] - ptr_hr_src_hc[ind0 + ni]) * int_phs;
+            vec_hc_dst[ni][nk] = ptr_hr_dst_hc[ind0 + ni] + (ptr_hr_dst_hc[ind1 + ni] - ptr_hr_dst_hc[ind0 + ni]) * int_phs;
+        }
+    }
+
+    hue_phs = mat_hue_to_index_phase(hue_src, GM_NUM_PRIM, vec_hc_src[0], gm_2pi, 0, vec_hue_ind);
+    if (vec_hue_ind[1] == 0)
+        vec_hc_dst[0][vec_hue_ind[1]] += gm_2pi;    /* correct hue for 2pi crossing */
+
+    /* calulate hue rotation */
+    hue_dst = vec_hc_dst[0][vec_hue_ind[0]] + (vec_hc_dst[0][vec_hue_ind[1]] - vec_hc_dst[0][vec_hue_ind[0]]) * hue_phs;
+    hue_dst = mat_norm_angle(hue_dst);
+    rot_hs_cg[0] = hue_dst - hue_src;
+
+    /* calculate chroma gain */
+    chroma_src = vec_hc_src[1][vec_hue_ind[0]] + (vec_hc_src[1][vec_hue_ind[1]] - vec_hc_src[1][vec_hue_ind[0]]) * hue_phs;
+    chroma_dst = vec_hc_dst[1][vec_hue_ind[0]] + (vec_hc_dst[1][vec_hue_ind[1]] - vec_hc_dst[1][vec_hue_ind[0]]) * hue_phs;
+    rot_hs_cg[1] = (chroma_src > 0.0) ? MAT_MIN(chroma_dst / chroma_src, 1.0) : 1.0;
+}
+
+int gm_map_seg_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3],
+    int zone, MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], int vec_hue_ind[2], MATFLOAT hue_phs)
+{
+    int pnt_map = -1;
+
+    switch (zone) {
+    case 1:
+        pnt_map = gm_map_zone1_seg(itp_inp, itp_out, vec_hue_ind, hue_phs, origin2_ic,
+            ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, 0, ptr_gamut_map->num_edge_pnts - 1);
+        break;
+    case 2:
+        pnt_map = gm_map_zone2_seg(itp_inp, itp_out, vec_hue_ind, hue_phs, origin2_ic,
+            ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, ptr_gamut_map->num_edge_pnts - 1, 0);
+        break;
+    case 3:
+        pnt_map = gm_map_zone3_seg(itp_inp, itp_out, vec_hue_ind, hue_phs, origin3_ic,
+            ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, ptr_gamut_map->num_edge_pnts - 1, 0);
+        break;
+    default:
+        mat_copy(itp_inp, itp_out, 3);
+        break;
+    }
+
+    return pnt_map;
+}
+
+int gm_map_rad_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3],
+    int zone, MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], MATFLOAT hue)
+{
+    switch (zone) {
+    case 1:
+        gm_map_zone1_rad(&ptr_gamut_map->color_space_dst, itp_inp, itp_out,
+            ptr_gamut_map->step_samp, origin2_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT);
+        break;
+    case 2:
+        gm_map_zone2_rad(&ptr_gamut_map->color_space_dst, itp_inp, itp_out,
+            ptr_gamut_map->step_samp, origin2_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT);
+        break;
+    case 3:
+        gm_map_zone3_rad(&ptr_gamut_map->color_space_dst, itp_inp, itp_out,
+            ptr_gamut_map->step_samp, origin3_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT);
+        break;
+    default:
+        mat_copy(itp_inp, itp_out, 3);
+        break;
+    }
+
+    return 1;
+}
+
+int gm_map_segrad_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3],
+    int zone, MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], MATFLOAT hue, int vec_hue_ind[2], MATFLOAT hue_phs)
+{
+    int pnt_map = -1;
+    MATFLOAT seg_itp[3];
+
+    switch (zone) {
+    case 1:
+        pnt_map = gm_map_zone1_seg(itp_inp, seg_itp, vec_hue_ind, hue_phs, origin2_ic,
+            ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, 0, ptr_gamut_map->num_edge_pnts - 1);
+        gm_map_zone1_rad(&ptr_gamut_map->color_space_dst, seg_itp, itp_out, ptr_gamut_map->step_samp,
+            origin2_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT);
+        break;
+    case 2:
+        pnt_map = gm_map_zone2_seg(itp_inp, seg_itp, vec_hue_ind, hue_phs, origin2_ic,
+            ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, ptr_gamut_map->num_edge_pnts - 1, 0);
+        gm_map_zone2_rad(&ptr_gamut_map->color_space_dst, seg_itp, itp_out, ptr_gamut_map->step_samp,
+            origin2_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT);
+        break;
+    case 3:
+        pnt_map = gm_map_zone3_seg(itp_inp, seg_itp, vec_hue_ind, hue_phs, origin3_ic,
+            ptr_gamut_map->num_edge_pnts, ptr_gamut_map->ptr_edge_ic, ptr_gamut_map->num_edge_pnts - 1, 0);
+        gm_map_zone3_rad(&ptr_gamut_map->color_space_dst, seg_itp, itp_out, ptr_gamut_map->step_samp,
+            origin3_ic, hue, ptr_gamut_map->mode & GM_PQTAB_3DLUT);
+        break;
+    default:
+        mat_copy(itp_inp, itp_out, 3);
+        break;
+    }
+
+    return pnt_map;
+}
+
+MATFLOAT gm_hue_to_index_phase(MATFLOAT hue, MATFLOAT hue_max, int num_hue_pnts, int vec_hue_ind[2])
+{
+    MATFLOAT hue_step = hue_max / (MATFLOAT)num_hue_pnts;
+    MATFLOAT hue_max_ind = hue_step * (MATFLOAT)(num_hue_pnts - 1);
+    MATFLOAT tmp = (MATFLOAT)(num_hue_pnts - 1) / hue_max_ind;
+
+    vec_hue_ind[0] = (int)(hue * tmp);
+    vec_hue_ind[1] = (vec_hue_ind[0] + 1) % num_hue_pnts;
+
+    return (hue - (MATFLOAT)vec_hue_ind[0] / tmp) / hue_step;
+}
+
+void gm_interp_ic(int vec_hue_ind[2], MATFLOAT hue_phs, MATFLOAT vec_pnt_ic[], MATFLOAT pnt_ic[2])
+{
+    int off0 = vec_hue_ind[0] << 1;
+    int off1 = vec_hue_ind[1] << 1;
+
+    pnt_ic[0] = vec_pnt_ic[off0 + 0] + (vec_pnt_ic[off1 + 0] - vec_pnt_ic[off0 + 0]) * hue_phs;
+    pnt_ic[1] = vec_pnt_ic[off0 + 1] + (vec_pnt_ic[off1 + 1] - vec_pnt_ic[off0 + 1]) * hue_phs;
+}
+
+void gm_getseg_ic(int vec_hue_ind[2], MATFLOAT hue_phs, int ind_seg, int num_edge_pnts,
+    MATFLOAT *ptr_edge_ic, MATFLOAT pnt_ic[2])
+{
+    int off0 = (vec_hue_ind[0] * num_edge_pnts + ind_seg) << 1;
+    int off1 = (vec_hue_ind[1] * num_edge_pnts + ind_seg) << 1;
+    MATFLOAT pnt0_ic[2], pnt1_ic[2];
+
+    pnt0_ic[0] = ptr_edge_ic[off0 + 0];
+    pnt0_ic[1] = ptr_edge_ic[off0 + 1];
+    pnt1_ic[0] = ptr_edge_ic[off1 + 0];
+    pnt1_ic[1] = ptr_edge_ic[off1 + 1];
+
+    pnt_ic[0] = pnt0_ic[0] + (pnt1_ic[0] - pnt0_ic[0]) * hue_phs;
+    pnt_ic[1] = pnt0_ic[1] + (pnt1_ic[1] - pnt0_ic[1]) * hue_phs;
+}
+
+void gm_genedge(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3], int num_edge_pnts,
+        enum gm_edge_type edge_type, MATFLOAT step_samp, MATFLOAT hue, MATFLOAT *ptr_edge_ic, int en_pq_lut)
+{
+    if (edge_type == EET_CHROMA) /* chroma for constant intensity */
+        gm_genedge_int(ptr_color_space, luma_limits, num_edge_pnts, hue, step_samp, ptr_edge_ic, en_pq_lut);
+    else /* intensity and chroma for constant elevaltion angle */
+        gm_genedge_rad(ptr_color_space, luma_limits, num_edge_pnts, hue, step_samp, ptr_edge_ic, en_pq_lut);
+}
+
+void gm_genedge_int(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3], int num_edge_pnts,
+        MATFLOAT hue, MATFLOAT step_samp, MATFLOAT *ptr_edge_ic, int en_pq_lut)
+{
+    MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) };
+    MATFLOAT step_int = luma_limits[2] / (MATFLOAT)(num_edge_pnts - 1);
+    MATFLOAT pnt_ic[2] = { luma_limits[0], 0.0 };
+    MATFLOAT inc_ic[2] = { 0.0, step_samp };
+    MATFLOAT vec_chroma_prev[2] = { pnt_ic[1], pnt_ic[1] };
+    int np;
+
+    ptr_edge_ic[0] = pnt_ic[0];
+    ptr_edge_ic[1] = pnt_ic[1];
+    for (np = 1; np < num_edge_pnts - 1; np++) {
+        pnt_ic[0] += step_int;
+        pnt_ic[1] = 2.0 * vec_chroma_prev[1] - vec_chroma_prev[0];    /* linear predictor */
+        pnt_ic[1] = MAT_MAX(pnt_ic[1], 0.0);
+        gm_sample_edge_ic(ptr_color_space, hue_sin_cos, inc_ic, pnt_ic, en_pq_lut);
+        vec_chroma_prev[0] = vec_chroma_prev[1];
+        vec_chroma_prev[1] = pnt_ic[1];
+        ptr_edge_ic[np * 2 + 0] = pnt_ic[0];
+        ptr_edge_ic[np * 2 + 1] = pnt_ic[1];
+    }
+    ptr_edge_ic[(num_edge_pnts - 1) * 2 + 0] = luma_limits[1];
+    ptr_edge_ic[(num_edge_pnts - 1) * 2 + 1] = 0.0;
+}
+
+void gm_genedge_rad(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3], int num_edge_pnts,
+    MATFLOAT hue, MATFLOAT step_samp, MATFLOAT *ptr_edge_ic, int en_pq_lut)
+{
+    const MATFLOAT gm_pi = mat_get_pi();
+    MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) };
+    MATFLOAT step_angle = gm_pi / (MATFLOAT)(num_edge_pnts - 1);
+    MATFLOAT vec_org[2] = { mat_denorm(GM_EDGE_ORG, ptr_color_space->luma_limits[0], ptr_color_space->luma_limits[2]), 0.0 };
+    MATFLOAT angle = step_angle;
+    MATFLOAT radius = vec_org[0] - luma_limits[0];
+    MATFLOAT vec_radius_prev[2] = { radius, radius };
+    int np;
+
+    ptr_edge_ic[0] = luma_limits[0];
+    ptr_edge_ic[1] = 0.0;
+    for (np = 1; np < num_edge_pnts - 1; np++) {
+        MATFLOAT ang_sin_cos[2] = { mat_sin(angle), mat_cos(angle) };
+        MATFLOAT inc_ic[2] = {-step_samp * ang_sin_cos[1], step_samp * ang_sin_cos[0] };
+        MATFLOAT pnt_ic[2];
+
+        if (np > 1)
+            radius = 2.0 * vec_radius_prev[1] - vec_radius_prev[0];    /* linear predictor */
+        pnt_ic[0] = vec_org[0] - radius * ang_sin_cos[1];
+        pnt_ic[1] = radius * ang_sin_cos[0];
+        gm_sample_edge_ic(ptr_color_space, hue_sin_cos, inc_ic, pnt_ic, en_pq_lut);
+        vec_radius_prev[0] = vec_radius_prev[1];
+        vec_radius_prev[1] = mat_radius(vec_org[0] - pnt_ic[0], pnt_ic[1]);
+        ptr_edge_ic[np * 2 + 0] = pnt_ic[0];
+        ptr_edge_ic[np * 2 + 1] = pnt_ic[1];
+        angle += step_angle;
+    }
+    ptr_edge_ic[(num_edge_pnts - 1) * 2 + 0] = luma_limits[1];
+    ptr_edge_ic[(num_edge_pnts - 1) * 2 + 1] = 0.0;
+}
+
+void gm_edgecusp_adjust(MATFLOAT *ptr_edge_ic, int num_edge_pnts, MATFLOAT cusp_ic[2])
+{
+    int ind0, ind1;
+    MATFLOAT delta0, delta1;
+
+    for (ind1 = 2 * (num_edge_pnts >> 2); ind1 < 2 * num_edge_pnts; ind1 += 2) {
+        if (ptr_edge_ic[ind1] >= cusp_ic[0]) {
+            ind0 = ind1 - 2;
+            delta1 = ptr_edge_ic[ind1] - cusp_ic[0];
+            delta0 = cusp_ic[0] - ptr_edge_ic[ind0];
+            if (delta0 < delta1) {
+                ptr_edge_ic[ind0] = cusp_ic[0];
+                ptr_edge_ic[ind0 + 1] = cusp_ic[1];
+            } else {
+                ptr_edge_ic[ind1] = cusp_ic[0];
+                ptr_edge_ic[ind1 + 1] = cusp_ic[1];
+            }
+            break;
+        }
+    }
+}
+
+void gm_sample_edge_ic(struct s_color_space *ptr_color_space, MATFLOAT hue_sin_cos[2],
+    MATFLOAT inc_ic[2], MATFLOAT pnt_ic[2], int en_pq_lut)
+{
+    if (gm_is_valid_ic(ptr_color_space, pnt_ic, hue_sin_cos, en_pq_lut)) {
+        do {
+            pnt_ic[0] += inc_ic[0];
+            pnt_ic[1] += inc_ic[1];
+        } while (gm_is_valid_ic(ptr_color_space, pnt_ic, hue_sin_cos, en_pq_lut));
+        pnt_ic[0] -= inc_ic[0];
+        pnt_ic[1] -= inc_ic[1];
+    } else {
+        do {
+            pnt_ic[0] -= inc_ic[0];
+            pnt_ic[1] -= inc_ic[1];
+            pnt_ic[1] = MAT_MAX(pnt_ic[1], 0.0); /* for zone 3 */
+        } while (!gm_is_valid_ic(ptr_color_space, pnt_ic, hue_sin_cos, en_pq_lut) && (pnt_ic[1] > 0.0));
+    }
+}
+
+
+int gm_get_zone(MATFLOAT itp[3], MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], MATFLOAT luma_limits[3])
+{
+    MATFLOAT chroma = mat_radius(itp[2], itp[1]);
+    MATFLOAT slope, offset;
+
+    if (itp[0] < origin2_ic[0])
+        return 1;
+
+    slope = (origin3_ic[0] - origin2_ic[0]) / (origin3_ic[1] - origin2_ic[1]);
+    offset = origin2_ic[0] - slope * origin2_ic[1];
+
+    if (itp[0] < slope * chroma + offset)
+        return 2;
+
+    return 3;
+}
+
+int gm_map_zone1_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2], MATFLOAT hue_phs,
+    MATFLOAT origin2_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_fst, int pnt_lst)
+{
+    int pnt_inc = (pnt_fst < pnt_lst) ? 1 : -1;
+    MATFLOAT pnt0_ich[3], pnt1_ich[3];
+    MATFLOAT pnt_ich[3];
+    MATFLOAT vec_seg_ic[2][2];
+    MATFLOAT s_ic[2];
+    int np;
+
+    cs_itp_to_ich(itp_inp, pnt0_ich);
+    pnt1_ich[0] = origin2_ic[0];
+    pnt1_ich[1] = 0.0;
+    pnt1_ich[2] = pnt0_ich[2];
+    s_ic[0] = pnt1_ich[0] - pnt0_ich[0];
+    s_ic[1] = pnt1_ich[1] - pnt0_ich[1];
+
+    gm_getseg_ic(vec_hue_ind, hue_phs, pnt_fst, num_edge_pnts, ptr_edge_ic, vec_seg_ic[0]);
+
+    for (np = pnt_fst + pnt_inc; (pnt_inc > 0) ? np <= pnt_lst : np >= pnt_lst; np += pnt_inc) {
+        gm_getseg_ic(vec_hue_ind, hue_phs, np, num_edge_pnts, ptr_edge_ic, vec_seg_ic[1]);
+        if (gm_seg_intersection(pnt0_ich, pnt1_ich, s_ic, vec_seg_ic[0], vec_seg_ic[1], pnt_ich)) {
+            pnt_ich[2] = pnt0_ich[2];
+            cs_ich_to_itp(pnt_ich, itp_out);
+            return np;
+        }
+        mat_copy(vec_seg_ic[1], vec_seg_ic[0], 2);
+    }
+
+    mat_copy(itp_inp, itp_out, 3); /* Should not happen */
+
+    return -1;
+}
+
+int gm_map_zone2_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2], MATFLOAT hue_phs,
+    MATFLOAT origin2_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_fst, int pnt_lst)
+{
+    int pnt_inc = (pnt_fst < pnt_lst) ? 1 : -1;
+    MATFLOAT pnt0_ich[3], pnt1_ich[3];
+    MATFLOAT pnt_ich[3];
+    MATFLOAT vec_seg_ic[2][2];
+    MATFLOAT s_ic[2];
+    int np;
+
+    cs_itp_to_ich(itp_inp, pnt0_ich);
+    pnt1_ich[0] = origin2_ic[0];
+    pnt1_ich[1] = origin2_ic[1];
+    pnt1_ich[2] = pnt0_ich[2];
+    s_ic[0] = pnt1_ich[0] - pnt0_ich[0];
+    s_ic[1] = pnt1_ich[1] - pnt0_ich[1];
+
+    gm_getseg_ic(vec_hue_ind, hue_phs, pnt_fst, num_edge_pnts, ptr_edge_ic, vec_seg_ic[0]);
+
+    for (np = pnt_fst + pnt_inc; (pnt_inc > 0) ? np <= pnt_lst : np >= pnt_lst; np += pnt_inc) {
+        gm_getseg_ic(vec_hue_ind, hue_phs, np, num_edge_pnts, ptr_edge_ic, vec_seg_ic[1]);
+        if (gm_seg_intersection(pnt0_ich, pnt1_ich, s_ic, vec_seg_ic[0], vec_seg_ic[1], pnt_ich)) {
+            pnt_ich[2] = pnt0_ich[2];
+            cs_ich_to_itp(pnt_ich, itp_out);
+            return np;
+        }
+        mat_copy(vec_seg_ic[1], vec_seg_ic[0], 2);
+    }
+
+    mat_copy(itp_inp, itp_out, 3); /* Should not happen */
+
+    return -1;
+}
+
+int gm_map_zone3_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2], MATFLOAT hue_phs,
+    MATFLOAT origin3_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_fst, int pnt_lst)
+{
+    int pnt_inc = (pnt_fst < pnt_lst) ? 1 : -1;
+    MATFLOAT pnt0_ich[3], pnt1_ich[3];
+    MATFLOAT pnt_ich[3];
+    MATFLOAT s_ic[2];
+    MATFLOAT vec_seg_ic[2][2];
+    MATFLOAT slope, offset;
+    int np;
+
+    cs_itp_to_ich(itp_inp, pnt0_ich);
+    slope = (origin3_ic[0] - pnt0_ich[0]) / (origin3_ic[1] - pnt0_ich[1]);
+    offset = pnt0_ich[0] - slope * pnt0_ich[1];
+    pnt0_ich[0] = offset;
+    pnt0_ich[1] = 0.0;
+
+    pnt1_ich[0] = origin3_ic[0];
+    pnt1_ich[1] = origin3_ic[1];
+    pnt1_ich[2] = pnt0_ich[2];
+    s_ic[0] = pnt1_ich[0] - pnt0_ich[0];
+    s_ic[1] = pnt1_ich[1] - pnt0_ich[1];
+
+    gm_getseg_ic(vec_hue_ind, hue_phs, num_edge_pnts - 1, num_edge_pnts, ptr_edge_ic, vec_seg_ic[0]);
+
+    /* prevent non-intersection for the last segment */
+    if (pnt0_ich[0] >= vec_seg_ic[0][0]) {
+        itp_out[0] = vec_seg_ic[0][0];
+        itp_out[1] = 0.0;
+        itp_out[2] = 0.0;
+        return num_edge_pnts - 1;
+    }
+
+    if (pnt_fst != num_edge_pnts - 1)
+        gm_getseg_ic(vec_hue_ind, hue_phs, pnt_fst, num_edge_pnts, ptr_edge_ic, vec_seg_ic[0]);
+
+    for (np = pnt_fst + pnt_inc; (pnt_inc > 0) ? np <= pnt_lst : np >= pnt_lst; np += pnt_inc) {
+        gm_getseg_ic(vec_hue_ind, hue_phs, np, num_edge_pnts, ptr_edge_ic, vec_seg_ic[1]);
+        if (gm_seg_intersection(pnt0_ich, pnt1_ich, s_ic, vec_seg_ic[0], vec_seg_ic[1], pnt_ich)) {
+            pnt_ich[2] = pnt0_ich[2];
+            cs_ich_to_itp(pnt_ich, itp_out);
+            return np;
+        }
+        mat_copy(vec_seg_ic[1], vec_seg_ic[0], 2);
+    }
+
+    mat_copy(itp_inp, itp_out, 3);    /* Should not happen */
+
+    return -1;
+}
+
+void gm_map_zone1_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT itp_out[3],
+    MATFLOAT step_samp, MATFLOAT origin2_ic[2], MATFLOAT hue, int en_pq_lut)
+{
+    MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) };
+    MATFLOAT chroma = mat_radius(itp_inp[2], itp_inp[1]);
+    MATFLOAT int_tmp = origin2_ic[0] - itp_inp[0];
+    MATFLOAT angle = mat_angle(chroma, int_tmp);
+    MATFLOAT pnt_ic[2] = { itp_inp[0], chroma };
+    MATFLOAT inc_ic[2] = { -step_samp * mat_cos(angle), step_samp * mat_sin(angle) };
+
+    gm_sample_edge_ic(ptr_color_space, hue_sin_cos, inc_ic, pnt_ic, en_pq_lut);
+
+    itp_out[0] = pnt_ic[0];
+    itp_out[1] = pnt_ic[1] * hue_sin_cos[1];
+    itp_out[2] = pnt_ic[1] * hue_sin_cos[0];
+}
+
+void gm_map_zone2_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT itp_out[3],
+    MATFLOAT step_samp, MATFLOAT origin2_ic[2], MATFLOAT hue, int en_pq_lut)
+{
+    MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) };
+    MATFLOAT chroma = mat_radius(itp_inp[2], itp_inp[1]);
+    MATFLOAT int_tmp = itp_inp[0] - origin2_ic[0];
+    MATFLOAT angle = mat_angle(int_tmp, chroma - origin2_ic[1]);
+    MATFLOAT pnt_ic[2] = { itp_inp[0], chroma };
+    MATFLOAT inc_ic[2] = { step_samp * mat_sin(angle), step_samp * mat_cos(angle) };
+
+    gm_sample_edge_ic(ptr_color_space, hue_sin_cos, inc_ic, pnt_ic, en_pq_lut);
+
+    itp_out[0] = pnt_ic[0];
+    itp_out[1] = pnt_ic[1] * hue_sin_cos[1];
+    itp_out[2] = pnt_ic[1] * hue_sin_cos[0];
+}
+
+void gm_map_zone3_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT itp_out[3],
+    MATFLOAT step_samp, MATFLOAT origin3_ic[2], MATFLOAT hue, int en_pq_lut)
+{
+    MATFLOAT hue_sin_cos[2] = { mat_sin(hue), mat_cos(hue) };
+    MATFLOAT chroma = mat_radius(itp_inp[2], itp_inp[1]);
+    MATFLOAT int_tmp = origin3_ic[0] - itp_inp[0];
+    MATFLOAT angle = mat_angle(int_tmp, origin3_ic[1] - chroma);
+    MATFLOAT pnt_ic[2] = { itp_inp[0], chroma };
+    MATFLOAT inc_ic[2] = { step_samp * mat_sin(angle), step_samp * mat_cos(angle) };
+
+    gm_sample_edge_ic(ptr_color_space, hue_sin_cos, inc_ic, pnt_ic, en_pq_lut);
+
+    itp_out[0] = pnt_ic[0];
+    itp_out[1] = pnt_ic[1] * hue_sin_cos[1];
+    itp_out[2] = pnt_ic[1] * hue_sin_cos[0];
+}
+
+void gm_show_pix(int zone, MATFLOAT itp_src[3], MATFLOAT itp_dst[3], MATFLOAT rgb[3],
+    enum gm_show_pix_mode show_pix_mode, MATFLOAT hue_limits[2])
+{
+    MATFLOAT hue = mat_angle(itp_src[2], itp_src[1]);
+
+    switch (show_pix_mode) {
+    case ESPM_NOMAP:
+        if (zone != 0)
+            mat_set(0.5, rgb, 3);
+        break;
+    case ESPM_MAP:
+        if (zone == 0)
+            mat_set(0.5, rgb, 3);
+        break;
+    case ESPM_MAPZ1:
+        if (zone != 1)
+            mat_set(0.5, rgb, 3);
+        break;
+    case ESPM_MAPZ2:
+        if (zone != 2)
+            mat_set(0.5, rgb, 3);
+        break;
+    case ESPM_MAPZ3:
+        if (zone != 3)
+            mat_set(0.5, rgb, 3);
+        break;
+    case ESPM_NUMZ:
+        mat_set((MATFLOAT)zone / 3.0, rgb, 3);
+        break;
+    case ESPM_HUEINP:
+        if ((hue < hue_limits[0]) || (hue > hue_limits[1]))
+            mat_set(0.5, rgb, 3);
+        break;
+    case ESPM_HUEOUT:
+        if ((hue < hue_limits[0]) || (hue > hue_limits[1]))
+            mat_set(0.5, rgb, 3);
+        break;
+    default:
+        break;
+    }
+}
+
+void gm_gen_3dlut(struct s_gamut_map* ptr_gamut_map, int num_pnts, int bitwidth,
+    int en_merge, unsigned short* ptr_3dlut_rgb)
+{
+    int val_max = (1 << bitwidth) - 1;
+    int index = 0;
+    int nir, nig, nib;
+    unsigned short rgb[3];
+    MATFLOAT rgb_src[3], rgb_dst[3];
+    MATFLOAT rgb_src_lin[3], rgb_dst_lin[3];
+
+    #ifdef GM_SIM
+    #pragma omp parallel for private(index, nig, nib, rgb, rgb_src, rgb_dst, rgb_src_lin, rgb_dst_lin)
+    #endif
+    for (nir = 0; nir < num_pnts; nir++) {
+        index = num_pnts * num_pnts * nir * 3;
+        rgb[0] = en_merge ? ptr_3dlut_rgb[index + 0] : (nir * val_max) / (num_pnts - 1);
+        rgb_src[0] = mat_int2flt(rgb[0], val_max);
+        rgb_src_lin[0] = cs_nlin_to_lin(&ptr_gamut_map->color_space_src, rgb_src[0]);
+        for (nig = 0; nig < num_pnts; nig++) {
+            rgb[1] = en_merge ? ptr_3dlut_rgb[index + 1] : (nig * val_max) / (num_pnts - 1);
+            rgb_src[1] = mat_int2flt(rgb[1], val_max);
+            rgb_src_lin[1] = cs_nlin_to_lin(&ptr_gamut_map->color_space_src, rgb_src[1]);
+            for (nib = 0; nib < num_pnts; nib++) {
+                rgb[2] = en_merge ? ptr_3dlut_rgb[index + 2] : (nib * val_max) / (num_pnts - 1);
+                rgb_src[2] = mat_int2flt(rgb[2], val_max);
+                rgb_src_lin[2] = cs_nlin_to_lin(&ptr_gamut_map->color_space_src, rgb_src[2]);
+
+                gm_rgb_to_rgb(ptr_gamut_map, rgb_src_lin, rgb_dst_lin);
+                cs_lin_to_nlin_rgb(&ptr_gamut_map->color_space_dst, rgb_dst_lin, rgb_dst);
+                cs_flt2short_rgb(rgb_dst, &ptr_3dlut_rgb[index], val_max);
+                index += 3;
+
+            }
+        }
+    }
+}
+
+void gm_gen_map(struct s_gamut_map* ptr_gamut_map, int update_msk)
+{
+    if (ptr_gamut_map->gamut_map_mode == EGMM_TM_CHTO)
+        if (update_msk & (GM_UPDATE_SRC | GM_UPDATE_DST)) {
+            MATFLOAT* ptr_org13_factor = (MATFLOAT*)ptr_gamut_map->ptr_func_alloc(ptr_gamut_map->num_hue_pnts * 2 * sizeof(MATFLOAT),
+                ptr_gamut_map->memory_context);
+            int nh;
+
+            gm_genorg13_factor(ptr_gamut_map, ptr_org13_factor);
+            #ifdef GM_SIM
+            #pragma omp parallel for num_threads(10)
+            #endif
+            for (nh = 0; nh < ptr_gamut_map->num_hue_pnts; nh++) {
+                /* generate origin 2 and 3 points per hue slice */
+                gm_genorigin23_hue(ptr_gamut_map, ptr_org13_factor, nh);
+            }
+
+            ptr_gamut_map->ptr_func_free(ptr_org13_factor, ptr_gamut_map->memory_context);
+        }
+
+    if ((ptr_gamut_map->gamut_map_mode > EGMM_TM) && (ptr_gamut_map->map_type != EMT_RAD))
+        if (update_msk & GM_UPDATE_DST) {
+            int nh;
+
+            #ifdef GM_SIM
+            #pragma omp parallel for num_threads(10)
+            #endif
+            for (nh = 0; nh < ptr_gamut_map->num_hue_pnts; nh++){
+                /* generate GBD per hue slice */
+                gm_gen_edge_hue(ptr_gamut_map, nh);
+            }
+        }
+}
+
+void gm_rgb_to_itp(struct s_color_space* ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT itp_out[3], int en_pq_lut)
+{    /* output may be the same as input */
+    MATFLOAT lms[3];
+    int nc;
+
+    mat_eval_3x3(ptr_color_space->mat_rgb2lms, rgb_inp, lms);
+    for (nc = 0; nc < 3; nc++)
+        lms[nc] = en_pq_lut ? gm_pq_lut(lms[nc], EGD_LIN_2_NONLIN) :
+        cs_gamma_pq(lms[nc], EGD_LIN_2_NONLIN);
+    mat_eval_3x3(ptr_color_space->mat_lms2itp, lms, itp_out);
+}
+
+void gm_itp_to_rgb(struct s_color_space* ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT rgb_out[3], int en_pq_lut)
+{    /* output may be the same as input */
+    MATFLOAT lms[3];
+    int nc;
+
+    mat_eval_3x3(ptr_color_space->mat_itp2lms, itp_inp, lms);
+    for (nc = 0; nc < 3; nc++)
+        lms[nc] = en_pq_lut ? gm_pq_lut(lms[nc], EGD_NONLIN_2_LIN) :
+        cs_gamma_pq(lms[nc], EGD_NONLIN_2_LIN);
+    mat_eval_3x3(ptr_color_space->mat_lms2rgb, lms, rgb_out);
+}
+
+int gm_is_valid_itp(struct s_color_space* ptr_color_space, MATFLOAT itp[3], int en_pq_lut)
+{
+    MATFLOAT rgb[3];
+
+    gm_itp_to_rgb(ptr_color_space, itp, rgb, en_pq_lut);
+
+    return cs_is_valid_rgb(rgb, ptr_color_space->luminance_limits[0], ptr_color_space->luminance_limits[1]);
+}
+
+int gm_is_valid_ic(struct s_color_space* ptr_color_space, MATFLOAT pnt_ic[2], MATFLOAT hue_sin_cos[2], int en_pq_lut)
+{
+    MATFLOAT pnt_itp[3];
+
+    pnt_itp[0] = pnt_ic[0];
+    pnt_itp[1] = pnt_ic[1] * hue_sin_cos[1];
+    pnt_itp[2] = pnt_ic[1] * hue_sin_cos[0];
+
+    return gm_is_valid_itp(ptr_color_space, pnt_itp, en_pq_lut);
+}
+
+void gm_gen_pq_lut(float* ptr_lut, int num_pnts, enum cs_gamma_dir gamma_dir)
+{
+    int ni;
+
+    if (gamma_dir == EGD_LIN_2_NONLIN) {
+        MATFLOAT increment = mat_pow(2.0, -32.0) / 128.0; /* also == pow(2,-39) or pow(2,-32)/128 */
+        MATFLOAT value = 0.0;
+
+        for (ni = 0; ni < num_pnts; ni++) {
+            ptr_lut[ni] = (float)cs_gamma_pq(value, gamma_dir);
+            /* every 128 pts, region changes and delta between pts doubles */
+            if ((ni > 0) && (ni % 128 == 0))
+                increment *= 2.0;
+            value += increment;
+        }
+
+    }
+    else
+        for (ni = 0; ni < num_pnts; ni++)
+            ptr_lut[ni] = (float)cs_gamma_pq((MATFLOAT)ni / (MATFLOAT)(num_pnts - 1), gamma_dir);
+}
+
+MATFLOAT gm_pq_lut(MATFLOAT val, enum cs_gamma_dir gamma_dir)
+{
+    static const MATFLOAT gm_inc = 1.0 / (MATFLOAT)((long long)1 << 32);
+    MATFLOAT sign = (val < 0.0) ? -1.0 : 1.0;
+    MATFLOAT val_abs = MAT_ABS(val);
+    MATFLOAT val_out, vec_inp[2], phs;
+    int vec_ind[2];
+
+    if (gamma_dir == EGD_LIN_2_NONLIN)
+        if (val_abs >= gm_inc) {
+            int exp;
+            MATFLOAT mantissa = mat_frexp(val_abs, &exp);
+            MATFLOAT tmp = (mantissa - 0.5) * 256.0;
+
+            vec_ind[0] = (int)tmp;
+            phs = tmp - (MATFLOAT)vec_ind[0];
+            vec_ind[0] += (exp + 31) << 7;
+            vec_ind[1] = vec_ind[0] + 1;
+            if (vec_ind[1] > GM_PQTAB_NUMPNTS - 1)
+                vec_ind[1] = GM_PQTAB_NUMPNTS - 1;
+            vec_inp[0] = gm_lin2pq[vec_ind[0]];
+            vec_inp[1] = gm_lin2pq[vec_ind[1]];
+            val_out = mat_linear(vec_inp, phs);
+        }
+        else
+            val_out = gm_lin2pq[0];
+    else {
+        MATFLOAT tmp = val_abs * (MATFLOAT)(GM_PQTAB_NUMPNTS - 1);
+        vec_ind[0] = (int)tmp;
+        phs = tmp - (MATFLOAT)vec_ind[0];
+        vec_ind[1] = vec_ind[0] + 1;
+        if (vec_ind[1] > GM_PQTAB_NUMPNTS - 1)
+            vec_ind[1] = GM_PQTAB_NUMPNTS - 1;
+        vec_inp[0] = gm_pq2lin[vec_ind[0]];
+        vec_inp[1] = gm_pq2lin[vec_ind[1]];
+        val_out = mat_linear(vec_inp, phs);
+    }
+
+    return val_out * sign;
+}
+
+int gm_seg_intersection(MATFLOAT p0_xy[2], MATFLOAT p1_xy[2], MATFLOAT s1_xy[2],
+    MATFLOAT p2_xy[2], MATFLOAT p3_xy[2], MATFLOAT p_xy[2])
+{
+    MATFLOAT s2_x = p3_xy[0] - p2_xy[0];
+    MATFLOAT s2_y = p3_xy[1] - p2_xy[1];
+    MATFLOAT denom = -s2_x * s1_xy[1] + s1_xy[0] * s2_y;
+    MATFLOAT s0_x, s0_y, s, t;
+
+    if (denom == 0.0)
+        return 0; /* no collision */
+
+    s0_x = p0_xy[0] - p2_xy[0];
+    s0_y = p0_xy[1] - p2_xy[1];
+
+    s = (-s1_xy[1] * s0_x + s1_xy[0] * s0_y) / denom;
+    if ((s < 0.0) || (s > 1.0))
+        return 0; /* no collision */
+
+    t = (s2_x * s0_y - s2_y * s0_x) / denom;
+    if ((t < 0.0) || (t > 1.0))
+        return 0; /* no collision */
+
+    /* collision detected */
+    p_xy[0] = p0_xy[0] + (t * s1_xy[0]);
+    p_xy[1] = p0_xy[1] + (t * s1_xy[1]);
+
+    return 1;
+}
\ No newline at end of file
diff --git a/src/amd/gmlib/gm/gm_funcs.h b/src/amd/gmlib/gm/gm_funcs.h
new file mode 100755
index 00000000000..52ca4db1de6
--- /dev/null
+++ b/src/amd/gmlib/gm/gm_funcs.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : gm_funcs.h
+ * Purpose    : Gamut Mapping functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : November 11, 2024
+ * Version    : 3.1
+ *----------------------------------------------------------------------
+ *
+ */
+
+#pragma once
+
+#include "mat_funcs.h"
+#include "cs_funcs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define GM_NUM_PRIM         6           /* number of primary/secondary colors */
+#define GM_NUM_HUE          360         /* default number of hue slices in edge description grid */
+#define GM_NUM_EDGE         181         /* default number of egde points per hue in edge description grid */
+#define GM_NUM_INT          33          /* default number of intensity levels in HueRot grid */
+#define GM_STEP_SAMP        0.0001      /* default accuracy of edge detection procedures (for 14 bits signal) */
+#define GM_EDGE_ORG         0.5         /* default center point for edge description procedure */
+#define GM_ORG1_FACTOR      0.5         /* Origin1 default intensity */
+#define GM_ORG3_FACTOR      1.0         /* Origin3 default intensity */
+#define GM_ORG2_PERC        0.9
+
+#define GM_CUSP_ADJUST      0x01        /* Adjust cusp points */
+#define GM_ZONE1_FLEX       0x02        /* Flexible zone 1 */
+#define GM_PQTAB_3DLUT      0x04
+#define GM_PQTAB_GBD        0x08
+#define GM_SCALE_LUMA       0x04        /* Luma scaling */
+
+#define GM_UPDATE_SRC       0x01
+#define GM_UPDATE_DST       0x02
+
+#define GM_HUE_SHIFT        0x01
+#define GM_CHROMA_GAIN      0x02
+
+#define GM_PQTAB_NUMPNTS    4097
+
+enum gm_gamut_map_mode {
+    EGMM_NONE = 0,       /* NONE */
+    EGMM_TM = 1,         /* Tone Map (BT2390-4) */
+    EGMM_TM_CHTO = 2,    /* Tone Map + CHTO (Constant Hue Triple Origin */
+    EGMM_TM_CHSO = 3,    /* Tone Map + CHSO (Constant Hue Single Origin */
+    EGMM_TM_CHCI = 4     /* Tone Map + CHCI (Constant Hue Constant Intensity) */
+};
+
+enum gm_hue_rot_mode {
+    EHRM_NONE = 0,       /* NONE */
+    EHRM_HR = 1,         /* Hue rotation */
+    EHRM_CC = 2,         /* Chroma compression */
+    EHRM_HR_CC = 3       /* Hue rotation + Chroma compression */
+};
+
+enum gm_map_type {
+    EMT_SEG = 0,         /* intensity segment */
+    EMT_RAD = 1,         /* arc segment */
+    EMT_SEGRAD = 2       /* hybrid */
+};
+
+enum gm_edge_type {
+    EET_RAD = 0,         /* elevation angle uniform */
+    EET_CHROMA = 1       /* intensity uniform */
+};
+
+enum gm_show_pix_mode {
+    ESPM_NONE = 0,       /* NONE */
+    ESPM_NOMAP = 1,      /* Show pixels inside gamut */
+    ESPM_MAP = 2,        /* Show pixels outside gamut */
+    ESPM_MAPZ1 = 3,      /* Show pixels outside gamut in zone1 */
+    ESPM_MAPZ2 = 4,      /* Show pixels outside gamut in zone2 */
+    ESPM_MAPZ3 = 5,      /* Show pixels outside gamut in zone3 */
+    ESPM_NUMZ = 6,       /* Show pixels zone number  */
+    ESPM_HUEINP = 7,     /* Show input pixels with hue in range */
+    ESPM_HUEOUT = 8      /* Show output pixels with hue in range */
+};
+
+struct s_gamut_map {
+    /* input parameters */
+    enum gm_gamut_map_mode    gamut_map_mode;
+    /* Gamut Map Mode: 0 - no gamut map, 1 - Tone Map BT2390-4, 2 - TM+CHTO, 3 - TM+CHSO, 4 - TM+CHCI */
+    enum gm_hue_rot_mode      hue_rot_mode;
+    /* Hue Rotation Mode: 0 - none, 1 - hue rotation, 2 - chroma compression, 3 - hue rotation and chroma compression */
+    int                       en_tm_scale_color;
+    /* Enable/Disable Color Scaling in Tone Mapping mode only: {0,1} = 1    */
+    unsigned int              mode;
+    /* Reserved for modifications of the Gamut Map algo */
+    struct s_color_space      color_space_src;
+    /* Source color space (primary RGBW chromaticity, gamma, and Luminance min/max) */
+    struct s_color_space      color_space_dst;
+    /* Destination color space (primary RGBW chromaticity, gamma and Luminance min/max) */
+    /* CHTO input tuning parameters */
+    MATFLOAT                  org2_perc_c;
+    /* Origin2 percentage gap for chroma [0.0,1.0] = 0.9 */
+    MATFLOAT                  vec_org1_factor[GM_NUM_PRIM];
+    /* Factor of Origin1 for M,R,Y,G,C,B [0.0,2.0] = 1.3, 1.3, 1.3, 1.3, 1.2, 1.0 */
+    MATFLOAT                  vec_org3_factor[GM_NUM_PRIM];
+    /* Factor of Origin3 for M,R,Y,G,C,B [1.0,1.5] = 1.05, 1.2, 1.05, 1.05, 1.01, 1.05 */
+    /* GM input tuning parameters */
+    int                       num_hue_pnts;
+    /* Number of hue grid points: [90,360]=360 */
+    int                       num_edge_pnts;
+    /* Number of edge IC grid points: [91, 181] = 181 */
+    int                       num_int_pnts;
+    /* Number of intensity grid points for primary hues: [5,33] = 33 */
+    enum gm_edge_type         edge_type;/* Edge type: {0,1} = 0 : 0 - radius based EET_RAD, 1 - chroma based EET_CHROMA */
+    enum gm_map_type          map_type;
+    /* Map type: {0,1,2} = 0 : 0 - segments intersection SEG, 1 - radius sampling RAD, 2 hybrid - SEG+RAD */
+    MATFLOAT                  step_samp;
+    /* Sampling precision in IC space for edge search [0.00001,0.001]=0.0001 */
+    int                       reserve;
+    /* Reserved for debugging purpose */
+    enum gm_show_pix_mode     show_pix_mode;
+    /* SHow Pix Mode: [0,8]=0 : show pixel debugging mode */
+    MATFLOAT                  show_pix_hue_limits[2];    /* Show Pixel mode hue ranges */
+    /* calculated variables */
+    MATFLOAT                  lum_min;
+    /* minLum (BT2390-4) in PQ non-linear space */
+    MATFLOAT                  lum_max;
+    /* maxLum (BT2390-4) in PQ non-linear space */
+    MATFLOAT                  vec_prim_src_ich[3 * GM_NUM_PRIM];
+    /* ich for M,R,Y,G,C,B primaries of source gamut */
+    MATFLOAT                  vec_prim_dst_ich[3 * GM_NUM_PRIM];
+    /* ich for M,R,Y,G,C,B primaries of target gamut */
+    MATFLOAT                  *ptr_cusp_src_ic;
+    /* Intensity and chroma of Cusp num_hue_pnts points for source gamut */
+    MATFLOAT                  *ptr_cusp_dst_ic;
+    /* Intensity and chroma of Cusp num_hue_pnts points for target gamut */
+    MATFLOAT                  *ptr_org2_ic;
+    /* Intensity and chroma of Origin2 for num_hue_pnts points */
+    MATFLOAT                  *ptr_org3_ic;
+    /* Intensity and chroma of Origin3 for num_hue_pnts points */
+    MATFLOAT                  *ptr_hr_src_hc;
+    /* Source Primary Hue and Chroma for (GM_NUM_PRIM * num_int_pnts) points */
+    MATFLOAT                  *ptr_hr_dst_hc;
+    /* Target Primary Hue and Chroma for (GM_NUM_PRIM * num_int_pnts) points */
+    MATFLOAT                  *ptr_edge_ic;
+    /* Target gamut edge for (num_hue_pnts * num_edge_pnts) points */
+    void                      *(*ptr_func_alloc)(unsigned int, void*);
+    /* allocate memory function */
+    void                     (*ptr_func_free)(void*, void*);
+    /* deallocate memory function */
+    void*                    memory_context;
+    /*memory management context*/
+    MATFLOAT                 hue_max;
+    MATFLOAT                 org1;
+    MATFLOAT                 org3;
+    /* internally calculated constant */
+};
+
+void gm_ctor(struct s_gamut_map *ptr_gamut_map,
+    void*(*ptr_func_alloc)(unsigned int, void*),
+    void(*ptr_func_free)(void*, void*),
+    void* mem_context); /* constructor */
+void gm_dtor(struct s_gamut_map *ptr_gamut_map);    /* destructor */
+void gm_alloc_mem(struct s_gamut_map *ptr_gamut_map);
+void gm_free_mem(struct s_gamut_map *ptr_gamut_map);
+
+/* initialization functions */
+void gm_set_def(struct s_gamut_map *gamut_map);
+int gm_init_gamuts(struct s_gamut_map *ptr_gamut_map, struct s_cs_opts *ptr_cs_opts_src,
+        struct s_cs_opts *ptr_cs_opts_dst, unsigned int gm_mode, int update_msk);
+int gm_check_gamut(struct s_gamut_map *ptr_gamut_map);
+void gm_gencusp_ic(struct s_gamut_map *ptr_gamut_map, int color_space);    /* color_space : 0 - source, 1 - target */
+
+/* gamut map description generation functions */
+void gm_gen_edge_hue(struct s_gamut_map* ptr_gamut_map, int hue_ind);
+
+/* resampling functions */
+void gm_resample_hc(MATFLOAT vec_ich_inp[][3], MATFLOAT *ptr_hc_out,
+        int num_int_pnts_src, int num_int_pnts_dst);
+void gm_resample_hue_ic(MATFLOAT *ptr_hue, MATFLOAT *ptr_ic_inp,
+        MATFLOAT *ptr_ic_out, int num_hue_pnts_inp, int num_hue_pnts_out);
+void gm_genprim_hc(struct s_color_space *ptr_color_space, MATFLOAT *ptr_hr_hc,
+        int num_int_pnts, MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max);
+
+/* Origin2 and Origin3 generation functions */
+void gm_genorg13_factor(struct s_gamut_map* ptr_gamut_map, MATFLOAT* ptr_org13_factor);
+void gm_genorigin23_hue(struct s_gamut_map* ptr_gamut_map, MATFLOAT* ptr_org13_factor, int hue_ind);
+void gm_getorigin23(struct s_color_space* ptr_color_space_src, struct s_color_space* ptr_color_space_dst,
+    MATFLOAT hue, MATFLOAT org_13_factor[2], MATFLOAT org2_perc_c,MATFLOAT cusp_ic_src[2],
+    MATFLOAT cusp_ic_dst[2], MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], int en_pq_lut);
+
+/* gamut map functions */
+int gm_rgb_to_rgb(struct s_gamut_map *ptr_gamut_map, MATFLOAT rgb_inp[3], MATFLOAT rgb_out[3]);
+MATFLOAT gm_tm_itp(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], MATFLOAT luma_limits[3],
+    MATFLOAT lum_min, MATFLOAT lum_max, int en_tm_scale_color, int en_tm_scale_luma); 
+MATFLOAT gm_tm_luma(MATFLOAT luma, MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max);
+MATFLOAT gm_scale_luma(MATFLOAT luma, MATFLOAT luma_limits[3], MATFLOAT lum_min, MATFLOAT lum_max);
+int gm_map_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]);
+int gm_map_chto_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]);
+int gm_map_chso_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]);
+int gm_map_chci_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3]);
+
+/* hue rotation functions */
+void gm_hr_itp(struct s_gamut_map *gamut_map, MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int direction);
+void gm_hr_ich(struct s_gamut_map *ptr_gamut_map, MATFLOAT ich_inp[3], MATFLOAT ich_out[3], int direction);
+void gm_get_hr_parms(MATFLOAT ich[3], MATFLOAT luma_limits[3], MATFLOAT *ptr_hr_src_hc,
+        MATFLOAT *ptr_hr_dst_hc, int num_int_pnts, MATFLOAT rot_hs_cg[2]);
+
+/* segments intersection functions */
+int gm_map_seg_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3],
+        MATFLOAT itp_out[3], int zone, MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], int vec_hue_ind[2], MATFLOAT hue_phs);
+int gm_map_rad_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3],
+        MATFLOAT itp_out[3], int zone, MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], MATFLOAT hue);
+int gm_map_segrad_itp(struct s_gamut_map *ptr_gamut_map, MATFLOAT itp_inp[3],
+        MATFLOAT itp_out[3], int zone, MATFLOAT origin2_ic[2],
+        MATFLOAT origin3_ic[2], MATFLOAT hue, int vec_hue_ind[2], MATFLOAT hue_phs);
+
+/* interpolate Ic between two hues */
+MATFLOAT gm_hue_to_index_phase(MATFLOAT hue, MATFLOAT hue_max, int num_hue_pnts, int vec_hue_ind[2]);
+void gm_interp_ic(int vec_hue_ind[2], MATFLOAT hue_phs,
+        MATFLOAT vec_pnt_ic[], MATFLOAT pnt_ic[2]);
+void gm_getseg_ic(int vec_hue_ind[2], MATFLOAT hue_phs,
+        int ind, int num_edge_pnts, MATFLOAT *ptr_edge_ic, MATFLOAT pnt_ic[2]);
+
+/* Edge generation functions */
+void gm_genedge(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3],
+        int num_edge_pnts, enum gm_edge_type edge_type, MATFLOAT step_samp, MATFLOAT hue,
+    MATFLOAT *ptr_edge_ic, int en_pq_lut);
+void gm_genedge_int(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3],
+        int num_edge_pnts, MATFLOAT hue, MATFLOAT step_samp, MATFLOAT *ptr_edge_ic,
+        int en_pq_lut);
+void gm_genedge_rad(struct s_color_space *ptr_color_space, MATFLOAT luma_limits[3],
+        int num_edge_pnts, MATFLOAT hue, MATFLOAT step_samp, MATFLOAT *ptr_edge_ic,
+    int en_pq_lut);
+void gm_sample_edge_ic(struct s_color_space *ptr_color_space,
+        MATFLOAT hue_cos_sin[2], MATFLOAT inc_ic[2], MATFLOAT pnt_ic[2],
+    int en_pq_lut);
+void gm_edgecusp_adjust(MATFLOAT *ptr_edge_ic, int num_edge_pnts, MATFLOAT cusp_ic[2]);
+
+/* Gamut Map related functions */
+int gm_get_zone(MATFLOAT itp[3], MATFLOAT origin2_ic[2], MATFLOAT origin3_ic[2], MATFLOAT luma_limits[3]);
+int gm_map_zone1_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2],
+        MATFLOAT hue_phs, MATFLOAT origin2_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_map, int pnt_inc);
+int gm_map_zone2_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2],
+        MATFLOAT hue_phs, MATFLOAT origin2_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_map, int pnt_inc);
+int gm_map_zone3_seg(MATFLOAT itp_inp[3], MATFLOAT itp_out[3], int vec_hue_ind[2],
+        MATFLOAT hue_phs, MATFLOAT origin3_ic[2], int num_edge_pnts, MATFLOAT *ptr_edge_ic, int pnt_map, int pnt_inc);
+void gm_map_zone1_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3],
+        MATFLOAT itp_out[3], MATFLOAT step_samp, MATFLOAT origin2_ic[2], MATFLOAT hue, int num_itr);
+void gm_map_zone2_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3],
+        MATFLOAT itp_out[3], MATFLOAT step_samp, MATFLOAT origin2_ic[2], MATFLOAT hue, int num_itr);
+void gm_map_zone3_rad(struct s_color_space *ptr_color_space, MATFLOAT itp_inp[3],
+        MATFLOAT itp_out[3], MATFLOAT step_samp, MATFLOAT origin3_ic[2], MATFLOAT hue, int num_itr);
+
+/* Show Pixel debugging functions */
+void gm_show_pix(int zone, MATFLOAT itp_src[3], MATFLOAT itp_dst[3],
+    MATFLOAT rgb[3], enum gm_show_pix_mode show_pix_mode, MATFLOAT hue_limits[2]);
+
+void gm_rgb_to_itp(struct s_color_space* ptr_color_space, MATFLOAT rgb_inp[3], MATFLOAT itp_out[3], int en_pq_lut);
+void gm_itp_to_rgb(struct s_color_space* ptr_color_space, MATFLOAT itp_inp[3], MATFLOAT rgb_out[3], int en_pq_lut);
+
+int gm_is_valid_itp(struct s_color_space* ptr_color_space, MATFLOAT itp[3], int en_pq_lut);
+int gm_is_valid_ic(struct s_color_space* ptr_color_space, MATFLOAT pnt_ic[2], MATFLOAT hue_sin_cos[2], int en_pq_lut);
+
+void gm_gen_pq_lut(float* ptr_lut, int num_pnts, enum cs_gamma_dir gamma_dir);
+MATFLOAT gm_pq_lut(MATFLOAT val, enum cs_gamma_dir gamma_dir);
+int gm_seg_intersection(MATFLOAT p0_xy[2], MATFLOAT p1_xy[2], MATFLOAT s1_xy[2],
+    MATFLOAT p2_xy[2], MATFLOAT p3_xy[2], MATFLOAT p_xy[2]);
+
+
+/* MULTI-THREADING */
+/* for multi-threading implementation the following function must be overwritten */
+void gm_gen_map(struct s_gamut_map* ptr_gamut_map, int update_msk);
+void gm_gen_3dlut(struct s_gamut_map* ptr_gamut_map, int num_pnts,
+    int bitwidth, int en_merge, unsigned short* ptr_3dlut_rgb);
+/* end MULTI-THREADING */
+
+/* global constants */
+static const MATFLOAT gm_vec_org13_factor_def[GM_NUM_PRIM][2] = {
+    {1.3, 1.05},    /* M */
+    {1.3, 1.10},    /* R */
+    {1.3, 1.10},    /* Y */
+    {1.3, 1.05},    /* G */
+    {1.2, 1.01},    /* C */
+    {1.0, 1.06}     /* B */
+};
+
+static const MATFLOAT gm_vec_cusp_rgb[GM_NUM_PRIM][3] = {
+    {1.0, 0.0, 1.0},    /* M */
+    {1.0, 0.0, 0.0},    /* R */
+    {1.0, 1.0, 0.0},    /* Y */
+    {0.0, 1.0, 0.0},    /* G */
+    {0.0, 1.0, 1.0},    /* C */
+    {0.0, 0.0, 1.0}     /* B */
+};
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/amd/gmlib/gm/mat_funcs.c b/src/amd/gmlib/gm/mat_funcs.c
new file mode 100755
index 00000000000..dd1d0042c89
--- /dev/null
+++ b/src/amd/gmlib/gm/mat_funcs.c
@@ -0,0 +1,918 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : mat_funcs.c
+ * Purpose    : Mathematical functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : September 20, 2023
+ * Version    : 1.2
+ *----------------------------------------------------------------------
+ */
+
+#ifndef GM_SIM
+#pragma code_seg("PAGED3PC")
+#pragma data_seg("PAGED3PD")
+#pragma const_seg("PAGED3PR")
+#endif
+
+#include "mat_funcs.h"
+#include <math.h>
+
+float mat_fast_log(float x);
+
+void mat_eval_3x3(MATFLOAT mat[3][3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3])
+{
+    int ni, nj;
+
+    mat_3x1_zero(vec_out);
+    for (ni = 0; ni < 3; ni++)
+        for (nj = 0; nj < 3; nj++)
+            vec_out[ni] += mat[ni][nj] * vec_inp[nj];
+}
+
+void mat_eval_3x3_off(MATFLOAT mat[3][3], MATFLOAT vec_off[3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3])
+{
+    int nc;
+
+    mat_eval_3x3(mat, vec_inp, vec_out);
+    for (nc = 0; nc < 3; nc++)
+        vec_out[nc] += vec_off[nc];
+}
+
+void mat_eval_off_3x3_off(MATFLOAT vec_off_inp[3], MATFLOAT mat[3][3],
+    MATFLOAT vec_off_out[3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3])
+{
+    MATFLOAT val_tmp[3];
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        val_tmp[nc] = vec_inp[nc] + vec_off_inp[nc];
+    mat_eval_3x3(mat, val_tmp, vec_out);
+    for (nc = 0; nc < 3; nc++)
+        vec_out[nc] += vec_off_out[nc];
+}
+
+void mat_mul3x3(MATFLOAT mat2[3][3], MATFLOAT mat1[3][3], MATFLOAT mat2x1[3][3])
+{
+    int ni, nj, nk;
+
+    mat_3x3_zero(mat2x1);
+    for (ni = 0; ni < 3; ni++)
+        for (nj = 0; nj < 3; nj++)
+            for (nk = 0; nk < 3; nk++)
+                mat2x1[ni][nj] += mat2[ni][nk] * mat1[nk][nj];
+}
+
+int mat_inv3x3(MATFLOAT mat_inp[3][3], MATFLOAT mat_out[3][3])
+{
+/*
+* Calculate the determinant of matrix A and determine if the
+* the matrix is singular as limited by the MATFLOAT precision
+* MATFLOATing-point data representation.
+*/
+    MATFLOAT det = 0.0;
+    MATFLOAT pos = 0.0;
+    MATFLOAT neg = 0.0;
+    MATFLOAT temp;
+
+    temp = mat_inp[0][0] * mat_inp[1][1] * mat_inp[2][2];
+    if (temp >= 0.0)
+        pos += temp;
+    else
+        neg += temp;
+    temp = mat_inp[0][1] * mat_inp[1][2] * mat_inp[2][0];
+    if (temp >= 0.0)
+        pos += temp;
+    else
+        neg += temp;
+    temp = mat_inp[0][2] * mat_inp[1][0] * mat_inp[2][1];
+    if (temp >= 0.0)
+        pos += temp;
+    else
+        neg += temp;
+    temp = -mat_inp[0][2] * mat_inp[1][1] * mat_inp[2][0];
+    if (temp >= 0.0)
+        pos += temp;
+    else
+        neg += temp;
+    temp = -mat_inp[0][1] * mat_inp[1][0] * mat_inp[2][2];
+    if (temp >= 0.0)
+        pos += temp;
+    else
+        neg += temp;
+    temp = -mat_inp[0][0] * mat_inp[1][2] * mat_inp[2][1];
+    if (temp >= 0.0)
+        pos += temp;
+    else
+        neg += temp;
+    det = pos + neg;
+
+    /* Is the submatrix A singular? */
+    if ((det == 0.0) || (MAT_ABS(det / (pos - neg)) < PRECISION_LIMIT))
+        return 0; /* Matrix M has no mat_inpverse */
+
+    /* Calculate inverse(A) = adj(A) / det(A) */
+    mat_out[0][0] =  (mat_inp[1][1] * mat_inp[2][2] - mat_inp[1][2] * mat_inp[2][1]) / det;
+    mat_out[1][0] = -(mat_inp[1][0] * mat_inp[2][2] - mat_inp[1][2] * mat_inp[2][0]) / det;
+    mat_out[2][0] =  (mat_inp[1][0] * mat_inp[2][1] - mat_inp[1][1] * mat_inp[2][0]) / det;
+    mat_out[0][1] = -(mat_inp[0][1] * mat_inp[2][2] - mat_inp[0][2] * mat_inp[2][1]) / det;
+    mat_out[1][1] =  (mat_inp[0][0] * mat_inp[2][2] - mat_inp[0][2] * mat_inp[2][0]) / det;
+    mat_out[2][1] = -(mat_inp[0][0] * mat_inp[2][1] - mat_inp[0][1] * mat_inp[2][0]) / det;
+    mat_out[0][2] =  (mat_inp[0][1] * mat_inp[1][2] - mat_inp[0][2] * mat_inp[1][1]) / det;
+    mat_out[1][2] = -(mat_inp[0][0] * mat_inp[1][2] - mat_inp[0][2] * mat_inp[1][0]) / det;
+    mat_out[2][2] =  (mat_inp[0][0] * mat_inp[1][1] - mat_inp[0][1] * mat_inp[1][0]) / det;
+
+    return 1;
+}
+
+void mat_3x1_zero(MATFLOAT vec_out[3])
+{
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        vec_out[nc] = 0.0;
+}
+
+void mat_3x3_zero(MATFLOAT mat_out[3][3])
+{
+    int ni, nj;
+
+    for (ni = 0; ni < 3; ni++)
+        for (nj = 0; nj < 3; nj++)
+            mat_out[ni][nj] = 0.0;
+}
+
+void mat_3x3_unity(MATFLOAT mat_out[3][3])
+{
+    int ni, nj;
+
+    for (ni = 0; ni < 3; ni++)
+        for (nj = 0; nj < 3; nj++)
+            mat_out[ni][nj] = (ni == nj) ? 1.0f : 0.0f;
+}
+
+void mat_copy3x3(MATFLOAT mat_inp[3][3], MATFLOAT mat_out[3][3])
+{
+    int ni, nj;
+
+    for (ni = 0; ni < 3; ni++)
+        for (nj = 0; nj < 3; nj++)
+            mat_out[ni][nj] = mat_inp[ni][nj];
+}
+
+int mat_round(MATFLOAT val)
+{
+    int sign = MAT_ZSGN(val);
+    int val_out = (int)(MAT_ABS(val) + 0.5);
+
+    return sign * val_out;
+}
+
+MATFLOAT mat_int2flt(int val, int val_max)
+{
+    return (MATFLOAT)val / (MATFLOAT)val_max;
+}
+
+int mat_flt2int(MATFLOAT val_inp, int val_max)
+{
+    MATFLOAT val_tmp = val_inp * (MATFLOAT)val_max;
+    int val_out = mat_round(val_tmp);
+
+    return MAT_CLAMP(val_out, 0, val_max);
+}
+
+void mat_gen_mat_off(MATFLOAT mat_inp[3][3], MATFLOAT vec_off_inp[3],
+    MATFLOAT vec_off_out[3], MATFLOAT mat_res[3][3], MATFLOAT vec_off_res[3])
+{
+    int nc;
+
+    /* construct transform. The 'inoff' is merged into output offset. */
+    if (vec_off_out)
+        for (nc = 0; nc < 3; nc++)
+            vec_off_res[nc] = vec_off_out[nc];
+    else
+        mat_3x1_zero(vec_off_res);
+
+    if (mat_inp)
+        mat_copy3x3(mat_inp, mat_res);
+    else
+        mat_3x3_unity(mat_res);
+
+    if (vec_off_inp)
+        for (nc = 0; nc < 3; nc++)
+            vec_off_res[nc] -= (mat_res[nc][0] * vec_off_inp[0] + mat_res[nc][1] *
+                    vec_off_inp[1] + mat_res[nc][2] * vec_off_inp[2]);
+}
+
+void mat_scl_off(MATFLOAT vec_off_inp[3], MATFLOAT vec_off_out[3], int bitwidth)
+{    /* output may be the same as input */
+    int nc;
+
+    for (nc = 0; nc < 3; nc++)
+        vec_off_out[nc] = vec_off_inp[nc] * (MATFLOAT)(1 << bitwidth);
+}
+
+void mat_cvt_cs(int vec_inp[3], int vec_out[3], int bitwidth,
+    MATFLOAT mat[3][3], MATFLOAT vec_off[3], int is_clip)
+{
+    int nc, ni;
+
+    for (nc = 0; nc < 3; nc++) {
+        MATFLOAT sum = vec_off[nc];
+
+        for (ni = 0; ni < 3; ni++)
+            sum += mat[nc][ni] * (MATFLOAT)vec_inp[ni];
+        int nValue = mat_round(sum);
+        if (is_clip) {
+            const int cnMaxValue = (1 << bitwidth) - 1;
+
+            MAT_CLAMP(nValue, 0, cnMaxValue);
+        }
+        vec_out[nc] = nValue;
+    }
+}
+
+MATFLOAT mat_norm_angle(MATFLOAT angle)
+{
+    MATFLOAT pi2 = 2.0f * mat_get_pi();
+    MATFLOAT angle_out = angle;
+
+    if (angle_out < 0.0f)
+        angle_out += pi2;
+    else if (angle_out >= pi2)
+        angle_out -= pi2;
+
+    return angle_out;
+}
+
+MATFLOAT mat_clamp(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_max)
+{
+    return MAT_CLAMP(val_inp, val_min, val_max);
+}
+
+int mat_is_valid(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_max)
+{
+    return ((mat_is_number(val_inp) == 0) || (val_inp < val_min) || (val_inp > val_max)) ? 0 : 1;
+}
+
+int mat_is_valid_vec(MATFLOAT vec_inp[], int size, MATFLOAT val_min, MATFLOAT val_max)
+{
+    int ni;
+
+    for (ni = 0; ni < size; ni++)
+        if (mat_is_valid(vec_inp[ni], val_min, val_max) == 0)
+            return 0;
+
+    return 1;
+}
+
+int mat_is_number(MATFLOAT val)
+{    /* Check if this is not NaN */
+    return (val == val);
+}
+
+MATFLOAT mat_norm(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_rng)
+{    /* map to [0.0,1.0] */
+    return (val_inp - val_min) / val_rng;
+}
+
+MATFLOAT mat_denorm(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_rng)
+{    /* map from [0.0,1.0] */
+    return val_inp * val_rng + val_min;
+}
+
+void mat_copy(MATFLOAT vec_inp[], MATFLOAT vec_out[], int size)
+{
+    int nc;
+
+    for (nc = 0; nc < size; nc++)
+        vec_out[nc] = vec_inp[nc];
+}
+
+void mat_set(MATFLOAT val_inp, MATFLOAT vec_out[], int size)
+{
+    int nc;
+
+    for (nc = 0; nc < size; nc++)
+        vec_out[nc] = val_inp;
+}
+
+int mat_flt_to_index(MATFLOAT val_inp, MATFLOAT val_max, int num_pnts)
+{
+    MATFLOAT step = val_max / (MATFLOAT)(num_pnts - 1);
+
+    return (int)(val_inp / step);
+}
+
+MATFLOAT mat_index_to_flt(int index, MATFLOAT val_max, int num_pnts)
+{
+    MATFLOAT step = val_max / (MATFLOAT)(num_pnts - 1);
+
+    return (MATFLOAT)index * step;
+}
+
+MATFLOAT mat_flt_to_index_phase(MATFLOAT val_inp, MATFLOAT val_max, int num_pnts, int vec_ind[2])
+{
+    MATFLOAT step = val_max / (MATFLOAT)(num_pnts - 1);
+    MATFLOAT tmp = val_inp / step;
+
+    vec_ind[0] = (int)tmp;
+    vec_ind[1] = vec_ind[0] + 1;
+    if (vec_ind[1] > num_pnts - 1)
+        vec_ind[1] = num_pnts - 1;
+
+    return tmp - (MATFLOAT)vec_ind[0];
+}
+
+MATFLOAT mat_vec_to_index_phase(MATFLOAT val_inp, MATFLOAT vec_val[], int num_pnts, int vec_ind[2])
+{
+    int ind0, ind1;
+
+    /* calculate indexes */
+    for (ind0 = num_pnts - 1; ind0 >= 0; ind0--) {
+        if (val_inp >= vec_val[ind0])
+            break;
+    }
+    ind1 = MAT_MIN(ind0 + 1, num_pnts - 1);
+
+    vec_ind[0] = ind0;
+    vec_ind[1] = ind1;
+
+    return (vec_val[ind0] == vec_val[ind1]) ? 0.0 : (val_inp - vec_val[ind0]) / (vec_val[ind1] - vec_val[ind0]);
+}
+
+int mat_int_to_index(int val_inp, int val_max, int num_indexes)
+{
+    return val_inp * (num_indexes - 1) / val_max;
+}
+
+int mat_index_to_int(int index, int val_max, int num_indexes)
+{
+    return index * val_max / (num_indexes - 1);
+}
+
+MATFLOAT mat_int_to_index_phase(int val_inp, int val_max, int num_indexes, int vec_val_ind[2])
+{
+    MATFLOAT step = (MATFLOAT)val_max / (MATFLOAT)(num_indexes - 1);
+
+    vec_val_ind[0] = mat_int_to_index(val_inp, val_max, num_indexes);
+    vec_val_ind[1] = MAT_MIN(vec_val_ind[0] + 1, num_indexes - 1);
+
+    return (val_inp - mat_index_to_int(vec_val_ind[0], val_max, num_indexes)) / step;
+}
+
+int mat_get_hue_index_2pi(MATFLOAT vec_hue[], int num_hue_pnts)
+{    /* find a point crossing 2PI */
+    int index_2pi;
+
+    for (index_2pi = num_hue_pnts - 1; index_2pi >= 1; index_2pi--)
+        if (vec_hue[index_2pi] < vec_hue[index_2pi - 1])
+            break;
+
+    return index_2pi;
+}
+
+MATFLOAT mat_hue_to_index_phase(MATFLOAT val_inp, int num_hue_pnts,
+    MATFLOAT vec_val[], MATFLOAT val_max, int index_max, int vec_ind_out[2])
+{
+    int ind0, ind1;
+    MATFLOAT step, delta;
+
+    /* calculate indexes */
+    ind1 = index_max;
+    while (val_inp >= vec_val[ind1]) {
+        ind1 = (ind1 + 1) % num_hue_pnts;
+        if (ind1 == index_max)
+            break;
+    }
+    ind0 = (ind1 > 0) ? ind1 - 1 : num_hue_pnts - 1;
+
+    /* calculate phase */
+    step = vec_val[ind1] - vec_val[ind0];
+    if (step < 0.0)
+        step += val_max;
+    delta = val_inp - vec_val[ind0];
+    if (delta < 0.0)
+        delta += val_max;
+
+    vec_ind_out[0] = ind0;
+    vec_ind_out[1] = ind1;
+
+    return delta / step;
+}
+
+int mat_seg_intersection(MATFLOAT p0_xy[2], MATFLOAT p1_xy[2],
+    MATFLOAT p2_xy[2], MATFLOAT p3_xy[2], MATFLOAT p_xy[2])
+{
+    MATFLOAT s1_x = p1_xy[0] - p0_xy[0];
+    MATFLOAT s1_y = p1_xy[1] - p0_xy[1];
+    MATFLOAT s2_x = p3_xy[0] - p2_xy[0];
+    MATFLOAT s2_y = p3_xy[1] - p2_xy[1];
+    MATFLOAT denom = -s2_x * s1_y + s1_x * s2_y;
+    MATFLOAT s0_x, s0_y, s, t;
+
+    if (denom == 0.0)
+        return 0; /* no collision */
+
+    s0_x = p0_xy[0] - p2_xy[0];
+    s0_y = p0_xy[1] - p2_xy[1];
+
+    s = (-s1_y * s0_x + s1_x * s0_y) / denom;
+    if ((s < 0.0) || (s > 1.0))
+        return 0; /* no collision */
+
+    t = (s2_x * s0_y - s2_y * s0_x) / denom;
+    if ((t < 0.0) || (t > 1.0))
+        return 0; /* no collision */
+
+    /* collision detected */
+    p_xy[0] = p0_xy[0] + (t * s1_x);
+    p_xy[1] = p0_xy[1] + (t * s1_y);
+
+    return 1;
+}
+
+MATFLOAT mat_linear(MATFLOAT vec_inp[2], MATFLOAT phs)
+{
+    return vec_inp[0] + (vec_inp[1] - vec_inp[0]) * phs;
+}
+
+MATFLOAT mat_bilinear(MATFLOAT vec_inp[2][2], MATFLOAT vec_phs[2])
+{
+    int ni;
+    MATFLOAT vec_tmp[2];
+
+    for (ni = 0; ni < 2; ni++)
+        vec_tmp[ni] = mat_linear(vec_inp[ni], vec_phs[0]);
+
+    return mat_linear(vec_tmp, vec_phs[1]);
+}
+
+MATFLOAT mat_trilinear(MATFLOAT vec_inp[2][2][2], MATFLOAT vec_phs[3])
+{
+    int ni;
+    MATFLOAT vec_tmp[2];
+
+    for (ni = 0; ni < 2; ni++)
+        vec_tmp[ni] = mat_bilinear(vec_inp[ni], vec_phs);
+
+    return mat_linear(vec_tmp, vec_phs[2]);
+}
+
+MATFLOAT mat_tetra(MATFLOAT vec_inp[2][2][2], MATFLOAT vec_phs[3])
+{
+    MATFLOAT fx = vec_phs[2];
+    MATFLOAT fy = vec_phs[1];
+    MATFLOAT fz = vec_phs[0];
+    MATFLOAT vec_c[3];
+    MATFLOAT value;
+    int nc;
+
+    if (fx > fy) {
+        if (fy > fz) { /* T0: x > y > z */
+            vec_c[0] = vec_inp[1][0][0] - vec_inp[0][0][0];
+            vec_c[1] = vec_inp[1][1][0] - vec_inp[1][0][0];
+            vec_c[2] = vec_inp[1][1][1] - vec_inp[1][1][0];
+        } else if (fx > fz) { /* T5: x > z > y */
+            vec_c[0] = vec_inp[1][0][0] - vec_inp[0][0][0];
+            vec_c[1] = vec_inp[1][1][1] - vec_inp[1][0][1];
+            vec_c[2] = vec_inp[1][0][1] - vec_inp[1][0][0];
+        } else { /* T4: z > x > y */
+            vec_c[0] = vec_inp[1][0][1] - vec_inp[0][0][1];
+            vec_c[1] = vec_inp[1][1][1] - vec_inp[1][0][1];
+            vec_c[2] = vec_inp[0][0][1] - vec_inp[0][0][0];
+        }
+    } else {
+        if (fx > fz) { /* T1: y > x > z */
+            vec_c[0] = vec_inp[1][1][0] - vec_inp[0][1][0];
+            vec_c[1] = vec_inp[0][1][0] - vec_inp[0][0][0];
+            vec_c[2] = vec_inp[1][1][1] - vec_inp[1][1][0];
+        } else if (fy > fz) { /* T2: y > z > x */
+            vec_c[0] = vec_inp[1][1][1] - vec_inp[0][1][1];
+            vec_c[1] = vec_inp[0][1][0] - vec_inp[0][0][0];
+            vec_c[2] = vec_inp[0][1][1] - vec_inp[0][1][0];
+        } else { /* T3: z > y > x */
+            vec_c[0] = vec_inp[1][1][1] - vec_inp[0][1][1];
+            vec_c[1] = vec_inp[0][1][1] - vec_inp[0][0][1];
+            vec_c[2] = vec_inp[0][0][1] - vec_inp[0][0][0];
+        }
+    }
+
+    value = vec_inp[0][0][0];
+    for (nc = 0; nc < 3; nc++)
+        value += vec_c[nc] * vec_phs[2 - nc];
+
+    return MAT_CLAMP(value, 0.0, 1.0);
+}
+
+MATFLOAT mat_cubic(MATFLOAT vec_inp[4], MATFLOAT phs)
+{
+    return vec_inp[1] + 0.5 * phs * (vec_inp[2] - vec_inp[0] +
+        phs * (2.0 * vec_inp[0] - 5.0 * vec_inp[1] + 4.0 * vec_inp[2] - vec_inp[3] +
+        phs * (3.0 * (vec_inp[1] - vec_inp[2]) + vec_inp[3] - vec_inp[0])));
+}
+
+MATFLOAT mat_mse(MATFLOAT val1[], MATFLOAT val2[], int size)
+{
+    MATFLOAT err = 0.0;
+    int nc;
+
+    for (nc = 0; nc < size; nc++) {
+        MATFLOAT err_tmp = val1[nc] - val2[nc];
+
+        err += err_tmp * err_tmp;
+    }
+
+    return mat_sqrt(err);
+}
+
+MATFLOAT mat_sshape(MATFLOAT val, MATFLOAT gamma)
+{
+    MATFLOAT k = 0.5 * mat_pow(0.5, -gamma);
+    MATFLOAT val_out = (val <= 0.5) ? k * mat_pow(val, gamma) : 1.0 - k * mat_pow((1.0 - val), gamma);
+
+    return val_out;
+}
+
+MATFLOAT mat_radius_vec(MATFLOAT vec_val[], MATFLOAT vec_org[], int size)
+{
+    MATFLOAT radius = 0.0;
+    int ni;
+
+    for (ni = 0; ni < size; ni++)
+        radius += (vec_val[ni] - vec_org[ni]) * (vec_val[ni] - vec_org[ni]);
+
+    return mat_sqrt(radius);
+}
+
+void mat_gain_vec(MATFLOAT vec_inp[], MATFLOAT vec_out[], MATFLOAT vec_org[], int size, MATFLOAT gain)
+{
+    int ni;
+
+    for (ni = 0; ni < 3; ni++)
+        vec_out[ni] = vec_org[ni] + (vec_inp[ni] - vec_org[ni]) * gain;
+}
+
+MATFLOAT mat_get_pi(void)
+{
+#ifdef GM_MAT_MATH
+    return (MATFLOAT)acos(-1.0);
+#else
+    return 3.14159265358979323;
+#endif
+}
+
+MATFLOAT mat_angle(MATFLOAT y, MATFLOAT x)
+{
+    return mat_norm_angle(mat_atan2(y, x));
+}
+
+MATFLOAT mat_radius(MATFLOAT y, MATFLOAT x)
+{
+    return mat_sqrt(y * y + x * x);
+}
+
+MATFLOAT mat_pow(MATFLOAT val0, MATFLOAT val1)
+{
+    return (MATFLOAT)pow(val0, val1);
+}
+
+MATFLOAT mat_atan2(MATFLOAT y, MATFLOAT x)
+{
+    return (MATFLOAT)atan2(y, x);
+}
+
+MATFLOAT mat_cos(MATFLOAT val)
+{
+    return (MATFLOAT)cos(val);
+}
+
+MATFLOAT mat_sin(MATFLOAT val)
+{
+    return (MATFLOAT)sin(val);
+}
+
+MATFLOAT mat_log2(MATFLOAT val)
+{
+    return (MATFLOAT)(mat_log(val) / mat_log(2.0));
+}
+
+MATFLOAT mat_log10(MATFLOAT val)
+{
+    return (MATFLOAT)(mat_log(val) / mat_log(10.0));
+}
+
+MATFLOAT mat_frexp(MATFLOAT val, int *exponent)
+{
+    return (MATFLOAT)frexp(val, exponent);
+}
+
+#ifndef GM_MAT_MATH
+static const unsigned char root_recip_table[128] = {
+    0x69, 0x66, 0x63, 0x61, 0x5E, 0x5B, 0x59, 0x57, /* for x =(2.0 ... 3.99)*(4^n) */
+    0x54, 0x52, 0x50, 0x4D, 0x4B, 0x49, 0x47, 0x45, /* (exponent is even) */
+    0x43, 0x41, 0x3F, 0x3D, 0x3B, 0x39, 0x37, 0x36,
+    0x34, 0x32, 0x30, 0x2F, 0x2D, 0x2C, 0x2A, 0x28,
+    0x27, 0x25, 0x24, 0x22, 0x21, 0x1F, 0x1E, 0x1D,
+    0x1B, 0x1A, 0x19, 0x17, 0x16, 0x15, 0x14, 0x12,
+    0x11, 0x10, 0x0F, 0x0D, 0x0C, 0x0B, 0x0A, 0x09,
+    0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
+    0xFE, 0xFA, 0xF6, 0xF3, 0xEF, 0xEB, 0xE8, 0xE4, /* for x =(1.0 ... 1.99)*(4^n) */
+    0xE1, 0xDE, 0xDB, 0xD7, 0xD4, 0xD1, 0xCE, 0xCB, /* (exponent is odd) */
+    0xC9, 0xC6, 0xC3, 0xC0, 0xBE, 0xBB, 0xB8, 0xB6,
+    0xB3, 0xB1, 0xAF, 0xAC, 0xAA, 0xA8, 0xA5, 0xA3,
+    0xA1, 0x9F, 0x9D, 0x9B, 0x99, 0x97, 0x95, 0x93,
+    0x91, 0x8F, 0x8D, 0x8B, 0x89, 0x87, 0x86, 0x84,
+    0x82, 0x80, 0x7F, 0x7D, 0x7B, 0x7A, 0x78, 0x77,
+    0x75, 0x74, 0x72, 0x71, 0x6F, 0x6E, 0x6C, 0x6B
+};
+
+/*
+ * find a reciprocal of square-root of x, using a similar method.
+ * an approximation is found, using the 6 MSBs of the mantissa,
+ * and the LSB of the exponent.
+ * The exponent mapping is a bit tricker than in the RECIPS case:
+ * we want
+ *    125,126 -> 127
+ *    127,128 -> 126
+ *    129,130 -> 125
+ *    131,132 -> 124
+ *
+ * So, we can take original exponent, add 131, then >>1, then
+ * take the 1's complement.
+ * The result is accurate +/- 1 lsb in float precision. I'm not
+ * sure exactly what the full range of this is, it should
+ * work for any values >0, except for denormals.
+ *
+ * iterative method:
+ * Cavanagh, J. 1984. Digital Computer Arithmetic. McGraw-Hill. Page 278.
+ */
+float mat_fast_rsqrt(float val)
+{
+    union {
+        float fval;
+        unsigned int uval;
+    } u;
+    unsigned int new_mant;
+    float rsqa, rprod;
+
+    u.fval = val;
+    u.uval &= 0x7FFFFFFF;        /* can't have sign */
+    val = u.fval * 0.5f;
+
+    new_mant = root_recip_table[(u.uval >> 17) & 0x7F];
+    /*
+     * create modified exponent    ; drop in new mantissa
+     */
+    u.uval = (~((u.uval + 0x41800000) >> 1) & 0x7F800000) + (new_mant << 15);
+    rsqa = u.fval;
+    /*
+     * note: we could do
+     *  rsqa *= 1.5f - rsqa*rsqa * x
+     * but there are cases where x is very small
+     * (zero or denormal) and rsqa*rsqa could overflow. We generate
+     * the wrong answer in these cases, but at least it isn't a NaN.
+     */
+    rprod = val * rsqa;
+    rsqa *= 1.5f - rprod * rsqa;
+    rprod = val * rsqa;
+    rsqa *= 1.5f - rprod * rsqa;
+    rprod = val * rsqa;
+    rsqa *= 1.5f - rprod * rsqa;
+
+    return rsqa;
+}
+
+#define Declare_Special_Float(cnst) { union { unsigned int ui; float f; } u; u.ui = (cnst); return u.f; }
+float FLT_INF(void);
+float FLT_MINF(void);
+float FLT_NAN(void);
+float FLT_INF(void) Declare_Special_Float(0x7F800000);
+float FLT_MINF(void) Declare_Special_Float(0xFF800000);
+float FLT_NAN(void) Declare_Special_Float(0x7F800001);
+/*
+ * table below is
+ * a = log(x+1), b = exp(-a);
+ * comment shows range of x to which each line applies.
+ */
+static const float log_tab[64] = {
+    0.000000000f,   1.000000000f,  /* 0 to  0.0111657 */
+    0.022311565f,   0.977935498f,  /* ... to  0.0340233 */
+    0.044580154f,   0.956398938f,  /* ... to  0.0572837 */
+    0.066807851f,   0.935374915f,  /* ... to  0.0810282 */
+    0.089004092f,   0.914841830f,  /* ... to  0.1052765 */
+    0.111178130f,   0.894779348f,  /* ... to  0.1300487 */
+    0.133338988f,   0.875168370f,  /* ... to  0.1553661 */
+    0.155495435f,   0.855990985f,  /* ... to  0.1812505 */
+    0.177655950f,   0.837230423f,  /* ... to  0.2077248 */
+    0.199828684f,   0.818871027f,  /* ... to  0.2348125 */
+    0.222021341f,   0.800898272f,  /* ... to  0.2625375 */
+    0.244241118f,   0.783298744f,  /* ... to  0.2909245 */
+    0.266494602f,   0.766060139f,  /* ... to  0.3199984 */
+    0.288787603f,   0.749171310f,  /* ... to  0.3497841 */
+    0.311125100f,   0.732622219f,  /* ... to  0.3803064 */
+    0.333510906f,   0.716404086f,  /* ... to  0.4115894 */
+    0.355947524f,   0.700509379f,  /* ... to  0.4436560 */
+    0.378435910f,   0.684931867f,  /* ... to  0.4765275 */
+    0.400975198f,   0.669666670f,  /* ... to  0.5102230 */
+    0.423562229f,   0.654710433f,  /* ... to  0.5447579 */
+    0.446191430f,   0.640061233f,  /* ... to  0.5801435 */
+    0.468854219f,   0.625718795f,  /* ... to  0.6163859 */
+    0.491538733f,   0.611684450f,  /* ... to  0.6534842 */
+    0.514229417f,   0.597961196f,  /* ... to  0.6914296 */
+    0.536906660f,   0.584553682f,  /* ... to  0.7302038 */
+    0.559546530f,   0.571468149f,  /* ... to  0.7697776 */
+    0.582120657f,   0.558712272f,  /* ... to  0.8101096 */
+    0.604596078f,   0.546295042f,  /* ... to  0.8511456 */
+    0.626935601f,   0.534226378f,  /* ... to  0.8928175 */
+    0.649098098f,   0.522516823f,  /* ... to  0.9350435 */
+    0.671039402f,   0.511176983f,  /* ... to  0.9777287 */
+    0.693147182f,   0.500000000f,  /* ....to  0.9999999 */
+};
+
+/*
+ * FAST LN function
+ *
+ * (1) split the number into its base-2 exponent 'e', and
+ *   a mantissa 'xm' in range 1.0 .. 1.99999
+ *
+ * (2) using a cubic, find y0 = approx. ln(xm)
+ * (3) scale this, round it to a table index 0...31.
+ *   From the table, get a log value, (which will be added to the result)
+ *   and a scale factor.
+ *   Multiply xm by the scale factor, result xe is very close to 1.
+ *
+ * (4) find ye = log(xe) using a taylor series around xe=1
+ * (5) result is is yt+ye+log(2)*exp, where yt is from the table (1st col)
+ * and exp is the original exponent.
+ * Note that multiplying the input by the second column of the the table,
+ * and adding the 1st column of the table to the result, has no net effect.
+ */
+float mat_fast_log(float x)
+{
+    union {
+        float f;
+        unsigned int ui;
+    } u;
+    float xm1, xe, ye;
+    int tabind;
+    int ex;
+
+    u.f = x;
+    ex = ((u.ui >> 23) & 0x1FF) - 127;
+    if ((ex <= -127) || (ex >= 128)) {
+        if ((ex & 0xFF) == 1)
+            return FLT_MINF();    /* was 0.0 or -0.0 (or denormal) */
+        return FLT_NAN();
+    }
+    u.ui -= ex << 23;
+    /*
+     * now u.f is in range 1.0 ... 1.99999
+     */
+    xm1 = u.f - 1.0f;        /* 0. 1.0 */
+    /*
+     * The table above and the cubic below were generated together
+     */
+    tabind = MAT_ROUND(((xm1 * 0.1328047513f - 0.4396575689f) * xm1 * xm1 + xm1) * 44.75f);
+    /*
+     * tabind is in range 0..31.
+     * multiply u.f by the second value in the table, subtract 1
+     */
+    xe = u.f * log_tab[2 * tabind + 1] - 1.0f;    /* result is  +/- .0114 */
+
+    /*
+     * find the log(xe+1) using taylor series; add to (a) amount from exponent
+     * (b) amount from table
+     */
+    ye = ((-0.25f * xe + 0.333333333f) * xe - 0.5f) * xe * xe;
+    ye += xe;
+    return  0.693147182f * (float)ex + log_tab[2 * tabind] + ye;
+}
+
+static const float exp_table[16] =
+{
+    /* (1/6) * 2^(i/16.), to float precision */
+    0.166666672f,  0.174045637f,  0.181751296f,  0.189798102f,
+    0.198201180f,  0.206976309f,  0.216139928f,  0.225709260f,
+    0.235702261f,  0.246137694f,  0.257035136f,  0.268415064f,
+    0.280298799f,  0.292708695f,  0.305668026f,  0.319201082f
+};
+
+/*
+ * FAST_EXP does an exponential function.
+ * This is done using a table lookup to
+ * get close and a taylor series to
+ * get accurate.
+ *
+ * if y = exp(x) = (2^m)*(P^n)*exp(f),   where P = 2^(1/16),
+ *
+ * then x = ln(2^m)  + ln(P^n) + f
+ *        = ln(P^(16*m+n)) + f
+ *        = ln(P) * [ 16*m +n ] +f
+ * let k = ln(P) = ln(2)/16 = 0.043321698785
+ *
+ * so x = k*[16*m + n] + f
+ *
+ * For a given x, we find m,n,f such that:
+ *   m is an integer
+ *   n is in integer 0..15
+ *   f is as close to zero as possible: +/- k/2
+ *
+ * Then we find y = (2^m)*(P^n)*exp(f)
+ *
+ *  where 2^m is an exponent adjustment, P^n is a table lookup
+ *  and exp(f) is calculated. The 4th term in the series
+ *  for exp(f) is at most k^4/(16*24) = 9.17e-9, so we only
+ *  need to do up to the 3rd order.
+ *
+ * One more quirk:
+ *  exp(f) is evaluated via
+ *  6*exp(f) = ((f + 3)*f + 6)*f + 6
+ *
+ * To compensate, the numbers in the P^n table are really 1/6 as
+ * big as they should be.
+ *
+ * Example: exp(13.2)
+ *   13.2 * (1/k) = 304.697, round to 305 => m*16+n = 305
+ *   f = 13.2 - k * 305 = -0.013118
+ *   m = 19, n = 1
+ *
+ *   6*exp(f) = ((f + 3)*f + 6)*f + 6 = 5.921805
+ *   exp_table[n] * (6*exp(f)) = .174046 * 5.921805 = 1.030664
+ *    multiply that by 2^m (=5.24288e5)  -> 5.40365e5
+ *
+ */
+float mat_fast_exp(float x)
+{
+    int m, n;
+    union {
+        unsigned ui;
+        float f;
+    } u;
+
+    n = MAT_ROUND(x * 23.08312065f);        /* 16/log(2) */
+    /*
+     * range check on n now
+     */
+    if ((n <= -2016) || (n >= 2048)) {
+        if (n < 0)
+            return 0.0f;
+        else
+            return FLT_INF();
+    }
+    x -= (float)n * 0.043321698785f;    /* log(2)/16. */
+
+    m = (n >> 4);
+    x = ((x + 3.0f) * x + 6.0f) * x + 6.0f;
+    u.f = x * exp_table[n & 15];
+    u.ui += (m << 23);    /* exponent adjust */
+
+    return u.f;
+}
+#endif
+
+MATFLOAT mat_sqrt(MATFLOAT val)
+{
+#ifndef GM_MAT_MATH
+    return 1.0 / (MATFLOAT)mat_fast_rsqrt((float)val);
+#else
+    return (MATFLOAT)sqrt(val);
+#endif
+}
+
+MATFLOAT mat_log(MATFLOAT val)
+{ /* base e */
+#ifdef GM_MAT_MATH
+    return (MATFLOAT)log(val);
+#else
+    return (MATFLOAT)mat_fast_log((float)val);
+#endif
+}
+
+MATFLOAT mat_exp(MATFLOAT val)
+{
+#ifdef GM_MAT_MATH
+    return (MATFLOAT)exp(val);
+#else
+    return (MATFLOAT)mat_fast_exp((float)val);
+#endif
+}
+
+unsigned int mat_index_3dlut(int ind_r, int ind_g, int ind_b, int num_pnts, enum mat_order_3dlut order)
+{
+    unsigned int index;
+
+    switch (order) {
+        case MAT_ORDER_RGB:
+            index = (ind_b * num_pnts + ind_g) * num_pnts + ind_r;
+            break;
+        case MAT_ORDER_BGR:
+        default:
+            index = (ind_r * num_pnts + ind_g) * num_pnts + ind_b;
+            break;
+    }
+
+    return index;
+}
diff --git a/src/amd/gmlib/gm/mat_funcs.h b/src/amd/gmlib/gm/mat_funcs.h
new file mode 100755
index 00000000000..43e793b44ca
--- /dev/null
+++ b/src/amd/gmlib/gm/mat_funcs.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ *----------------------------------------------------------------------
+ * File Name  : mat_funcs.h
+ * Purpose    : Mathematical functions
+ * Author     : Vladimir Lachine (vlachine@amd.com)
+ * Date       : September 20, 2023
+ * Version    : 1.2
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+#define MATFLOAT double
+
+/* precision for matrix inversion */
+#define PRECISION_LIMIT (1.0e-15)
+
+/* absolute value of a */
+#define MAT_ABS(a)        (((a) < 0) ? -(a) : (a))
+
+/* find minimum of a and b */
+#define MAT_MIN(a, b)        (((a) < (b)) ? (a) : (b))
+
+/* find maximum of a and b */
+#define MAT_MAX(a, b)        (((a) > (b)) ? (a) : (b))
+
+/* clip to range */
+#define MAT_CLAMP(v, l, h)    ((v) < (l) ? (l) : ((v) > (h) ? (h) : v))
+
+/* round a to nearest int */
+#define MAT_ROUND(a)        (int)((a) + 0.5f)
+
+/* take sign of a, either -1, 0, or 1 */
+#define MAT_ZSGN(a)        (((a) < 0) ? -1 : (a) > 0 ? 1 : 0)
+
+/* take binary sign of a, either -1, or 1 if >= 0 */
+#define MAT_SGN(a)        (((a) < 0) ? -1 : 1)
+
+/* swap a and b (see Gem by Wyvill) */
+#define MAT_SWAP(a, b)    { a ^ = b; b ^ = a; a ^= b; }
+
+/* linear interpolation from l (when a=0) to h (when a=1) */
+/* (equal to (a*h)+((1-a)*l) */
+#define MAT_LERP(a, l, h)    ((l) + (((h) - (l)) * (a)))
+
+/* vector operations */
+void mat_eval_3x3(MATFLOAT mat[3][3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3]);
+void mat_eval_3x3_off(MATFLOAT mat[3][3], MATFLOAT vec_off[3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3]);
+void mat_eval_off_3x3_off(MATFLOAT vec_off_inp[3], MATFLOAT mat[3][3],
+    MATFLOAT vec_off_out[3], MATFLOAT vec_inp[3], MATFLOAT vec_out[3]);
+void mat_mul3x3(MATFLOAT mat2[3][3], MATFLOAT mat1[3][3], MATFLOAT mat2x1[3][3]);
+int mat_inv3x3(MATFLOAT mat_inp[3][3], MATFLOAT mat_out[3][3]);
+
+void mat_3x1_zero(MATFLOAT vec_out[3]);
+void mat_3x3_zero(MATFLOAT mat_out[3][3]);
+void mat_3x3_unity(MATFLOAT mat_out[3][3]);
+void mat_copy3x3(MATFLOAT mat_inp[3][3], MATFLOAT mat_out[3][3]);
+
+int mat_round(MATFLOAT val);
+
+MATFLOAT mat_int2flt(int val, int val_max);
+int mat_flt2int(MATFLOAT val, int val_max);
+
+void mat_gen_mat_off(MATFLOAT mat_inp[3][3], MATFLOAT vec_off_inp[3],
+    MATFLOAT vec_off_out[3], MATFLOAT mat_res[3][3], MATFLOAT vec_off_res[3]);
+void mat_scl_off(MATFLOAT vec_off_inp[3], MATFLOAT vec_off_out[3], int bitwidth);
+void mat_cvt_cs(int vec_inp[3], int vec_out[3], int bitwidth, MATFLOAT mat[3][3], MATFLOAT vec_off[3], int is_clip);
+
+MATFLOAT mat_norm_angle(MATFLOAT angle);
+
+MATFLOAT mat_clamp(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_max);
+int mat_is_valid(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_max);
+int mat_is_valid_vec(MATFLOAT val_inp[], int size, MATFLOAT val_min, MATFLOAT val_max);
+int mat_is_number(MATFLOAT val);
+MATFLOAT mat_norm(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_rng);
+MATFLOAT mat_denorm(MATFLOAT val_inp, MATFLOAT val_min, MATFLOAT val_rng);
+
+void mat_copy(MATFLOAT vec_inp[], MATFLOAT vec_out[], int size);
+void mat_set(MATFLOAT val_inp, MATFLOAT vec_out[], int size);
+
+int mat_flt_to_index(MATFLOAT val_inp, MATFLOAT val_max, int num_pnts);
+MATFLOAT mat_index_to_flt(int index, MATFLOAT val_max, int num_pnts);
+MATFLOAT mat_flt_to_index_phase(MATFLOAT val_inp, MATFLOAT val_max, int num_pnts, int vec_ind[2]);
+MATFLOAT mat_vec_to_index_phase(MATFLOAT val_inp, MATFLOAT vec_val[], int num_pnts, int vec_ind[2]);
+
+int mat_int_to_index(int val_inp, int val_max, int num_indexes);
+int mat_index_to_int(int index, int val_max, int num_indexes);
+MATFLOAT mat_int_to_index_phase(int val_inp, int val_max, int num_indexes, int vec_val_ind[2]);
+int mat_get_hue_index_2pi(MATFLOAT vec_hue[], int num_hue_pnts);
+MATFLOAT mat_hue_to_index_phase(MATFLOAT val_inp, int num_hue_pnts,
+    MATFLOAT vec_val[], MATFLOAT val_max, int index_max, int vec_ind_out[2]);
+
+int mat_seg_intersection(MATFLOAT p0_xy[2], MATFLOAT p1_xy[2],
+    MATFLOAT p2_xy[2], MATFLOAT p3_xy[2], MATFLOAT p_xy[2]);
+
+MATFLOAT mat_linear(MATFLOAT vec_inp[2], MATFLOAT phs);
+MATFLOAT mat_bilinear(MATFLOAT vec_inp[2][2], MATFLOAT vec_phs[2]);
+MATFLOAT mat_trilinear(MATFLOAT vec_inp[2][2][2], MATFLOAT vec_phs[3]);
+MATFLOAT mat_tetra(MATFLOAT vec_inp[2][2][2], MATFLOAT vec_phs[3]);
+MATFLOAT mat_cubic(MATFLOAT vec_inp[4], MATFLOAT phs);
+
+MATFLOAT mat_mse(MATFLOAT val1[], MATFLOAT val2[], int size);
+MATFLOAT mat_sshape(MATFLOAT val, MATFLOAT gamma);
+MATFLOAT mat_get_pi(void);
+
+MATFLOAT mat_angle(MATFLOAT y, MATFLOAT x);
+MATFLOAT mat_radius(MATFLOAT y, MATFLOAT x);
+MATFLOAT mat_radius_vec(MATFLOAT val[], MATFLOAT org[], int size);
+void mat_gain_vec(MATFLOAT vec_inp[], MATFLOAT vec_out[], MATFLOAT vec_org[], int size, MATFLOAT gain);
+
+MATFLOAT mat_pow(MATFLOAT val0, MATFLOAT val1);
+MATFLOAT mat_atan2(MATFLOAT y, MATFLOAT x);
+MATFLOAT mat_cos(MATFLOAT val);
+MATFLOAT mat_sin(MATFLOAT val);
+MATFLOAT mat_sqrt(MATFLOAT val);
+MATFLOAT mat_log(MATFLOAT val);
+MATFLOAT mat_log2(MATFLOAT val);
+MATFLOAT mat_log10(MATFLOAT val);
+MATFLOAT mat_frexp(MATFLOAT val, int *exponent);
+
+#ifndef GM_MAT_MATH
+float mat_fast_rsqrt(float val);
+float mat_fast_exp(float x);
+#endif
+
+MATFLOAT mat_exp(MATFLOAT val);
+
+enum mat_order_3dlut {
+    MAT_ORDER_RGB = 0,
+    MAT_ORDER_BGR = 1
+};
+
+unsigned int mat_index_3dlut(int ind_r, int ind_g, int ind_b, int num_pnts, enum mat_order_3dlut order);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/amd/gmlib/meson.build b/src/amd/gmlib/meson.build
new file mode 100755
index 00000000000..4098b5f8786
--- /dev/null
+++ b/src/amd/gmlib/meson.build
@@ -0,0 +1,65 @@
+# Copyright 2022 Advanced Micro Devices, Inc.
+# SPDX-License-Identifier: MIT
+
+c_args_gm = cc.get_supported_arguments([
+  '-Wall',
+  '-Wextra',
+  '-Wno-unused',
+  '-Wno-unused-parameter',
+  '-Wno-unused-command-line-argument',
+  '-Wno-ignored-qualifiers',
+  '-Wno-missing-field-initializers',
+  '-Wno-self-assign',
+  '-Wno-implicit-fallthrough',
+  '-Werror=comment',
+  '-Werror=missing-braces',
+  '-Werror=override-init',
+  '-Werror=enum-conversion',
+  '-Werror=enum-compare',
+  '-Werror=maybe-uninitialized',
+])
+
+c_args_gm += [
+  '-DGM_SIM',
+]
+
+gm_files = files(
+  'tonemap_adaptor.h',
+  'tonemap_adaptor.c',
+  'gm/csc_api_funcs.c',
+  'gm/csc_api_funcs.h',
+  'gm/csc_funcs.c',
+  'gm/csc_funcs.h',
+  'gm/cs_funcs.c',
+  'gm/cs_funcs.h',
+  'gm/cvd_api_funcs.c',
+  'gm/cvd_api_funcs.h',
+  'gm/cvd_funcs.c',
+  'gm/cvd_funcs.h',
+  'gm/gm_api_funcs.c',
+  'gm/gm_api_funcs.h',
+  'gm/gm_funcs.c',
+  'gm/gm_funcs.h',
+  'gm/mat_funcs.c',
+  'gm/mat_funcs.h',
+  'ToneMapGenerator/inc/ToneMapGenerator.h',
+  'ToneMapGenerator/inc/ToneMapTypes.h',
+  'ToneMapGenerator/src/inc/AGMGenerator.h',
+  'ToneMapGenerator/src/inc/CSCGenerator.h',
+  'ToneMapGenerator/src/src/AGMGenerator.c',
+  'ToneMapGenerator/src/src/ToneMapGenerator.c',
+)
+
+inc_amd_gm = include_directories(
+  'gm',
+  'ToneMapGenerator/inc',
+  'ToneMapGenerator/src/inc',
+)
+
+libgm = static_library(
+  'libgm.a',
+  gm_files,
+  install : false,
+  c_args : c_args_gm,
+  include_directories : inc_amd_gm
+)
diff --git a/src/amd/gmlib/tonemap_adaptor.c b/src/amd/gmlib/tonemap_adaptor.c
new file mode 100755
index 00000000000..825c0363b94
--- /dev/null
+++ b/src/amd/gmlib/tonemap_adaptor.c
@@ -0,0 +1,78 @@
+/* Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include "ToneMapGenerator.h"
+#include "AGMGenerator.h"
+#include "tonemap_adaptor.h"
+
+static void VPEFree3DLut(void* memToFree, void* pDevice)
+{
+   free(memToFree);
+}
+
+static void* VPEAlloc3DLut(unsigned int allocSize, void* pDevice)
+{
+    return calloc(1, allocSize);
+}
+
+void* tm_create(void)
+{
+    struct ToneMapGenerator* p_tmGenerator = (struct ToneMapGenerator*)calloc(1, sizeof(struct ToneMapGenerator));
+    if (!p_tmGenerator)
+        return NULL;
+
+    p_tmGenerator->tmAlgo = TMG_A_AGM;
+    p_tmGenerator->memAllocSet = false;
+    p_tmGenerator->agmGenerator.initalized = false;
+
+    return (void*)p_tmGenerator;
+}
+
+void tm_destroy(void** pp_tmGenerator)
+{
+    struct ToneMapGenerator* p_tmGenerator;
+
+    if (!pp_tmGenerator || ((*pp_tmGenerator) == NULL))
+        return;
+
+    p_tmGenerator = *pp_tmGenerator;
+    AGMGenerator_Exit(&p_tmGenerator->agmGenerator);
+
+    free(p_tmGenerator);
+    *pp_tmGenerator = NULL;
+}
+
+int tm_generate3DLut(struct tonemap_param* pInparam, void* pformattedLutData)
+{
+    enum TMGReturnCode               result;
+    struct ToneMappingParameters     tmParams;
+
+    tmParams.lutData = (uint16_t *)pformattedLutData;
+
+    ToneMapGenerator_SetInternalAllocators(
+                    (struct ToneMapGenerator*)pInparam->tm_handle,
+                    (TMGAlloc)(VPEAlloc3DLut),
+                    (TMGFree)(VPEFree3DLut),
+                    (void*)(NULL));
+
+    result = ToneMapGenerator_GenerateToneMappingParameters(
+                    (struct ToneMapGenerator*)pInparam->tm_handle,
+                    &pInparam->streamMetaData,
+                    &pInparam->dstMetaData,
+                    pInparam->inputContainerGamma,
+                    pInparam->outputContainerGamma,
+                    pInparam->outputContainerPrimaries,
+                    pInparam->lutDim,
+                    &tmParams
+    );
+
+    return (int)result;
+}
diff --git a/src/amd/gmlib/tonemap_adaptor.h b/src/amd/gmlib/tonemap_adaptor.h
new file mode 100755
index 00000000000..41777a24977
--- /dev/null
+++ b/src/amd/gmlib/tonemap_adaptor.h
@@ -0,0 +1,33 @@
+/* Copyright 2025 Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ * Authors: AMD
+ *
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "ToneMapGenerator/inc/ToneMapTypes.h"
+
+struct tonemap_param
+{
+    void*                        tm_handle;
+    struct ToneMapHdrMetaData    streamMetaData;
+    struct ToneMapHdrMetaData    dstMetaData;
+    enum ToneMapTransferFunction inputContainerGamma;
+    enum ToneMapTransferFunction outputContainerGamma;
+    enum ToneMapColorPrimaries   outputContainerPrimaries;
+    unsigned short               lutDim;
+};
+
+void* tm_create(void);
+void  tm_destroy(void** pp_tmGenerator);
+int   tm_generate3DLut(struct tonemap_param* pInparam, void* pformattedLutData);
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/src/amd/meson.build b/src/amd/meson.build
index b0d9cb3c5bf..146f2bed97d 100644
--- a/src/amd/meson.build
+++ b/src/amd/meson.build
@@ -27,4 +27,5 @@ endif
 
 if with_gallium_radeonsi
   subdir('vpelib')
+  subdir('gmlib')
 endif