mirror of
https://gitlab.freedesktop.org/wayland/weston.git
synced 2025-12-20 04:40:07 +01:00
color-lcms: optimize build_3d_lut()
Previously, inverse_evaluate_lut1d was called 3 * len^3 times. Now it is
called only 3 * len times with the help of pre-computed arrays for red
and green channels. Blue channel does not need an array, because there
were no redundant computations. This is a significant win.
Also, allocate a temporary array rgb_in, so cmsDoTransform() can be
called in batches of len triplets. This seemed to be not that big win. I
tried running cmsDoTransform() over len^2 triplets, but that did not
seem to improve performance.
The test I used creates two 3D LUTs, hence two times for a single run.
My representative timing test results per one 3D LUT:
- before: 16 ms and 19 ms
- after: 7 ms and 10 ms
The measurements were done with this patch:
static bool
xform_to_shaper_plus_3dlut(struct weston_color_transform *xform_base,
uint32_t len_shaper, float *shaper,
uint32_t len_lut3d, float *lut3d)
{
struct cmlcms_color_transform *xform = to_cmlcms_xform(xform_base);
struct weston_compositor *compositor = xform_base->cm->compositor;
bool ret;
+ struct timespec begin, end;
+ unsigned i;
- ret = build_shaper(xform->lcms_ctx, xform->cmap_3dlut,
+ clock_gettime(CLOCK_MONOTONIC, &begin);
+ for (i = 0; i < 100; i++)
+ ret = build_shaper(xform->lcms_ctx, xform->cmap_3dlut,
len_shaper, shaper);
if (!ret)
return false;
- ret = build_3d_lut(compositor, xform->cmap_3dlut,
+ for (i = 0; i < 100; i++)
+ ret = build_3d_lut(compositor, xform->cmap_3dlut,
len_shaper, shaper, len_lut3d, lut3d);
if (!ret)
return false;
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ fprintf(stderr, "%s: %" PRId64 " ms\n", __func__, timespec_sub_to_msec(&end, &begin));
return true;
}
Using this command:
$ ./tests/test-color-icc-output -f 8 opaque_pixel_conversion
Signed-off-by: Pekka Paalanen <pekka.paalanen@collabora.com>
This commit is contained in:
parent
f69bb08738
commit
d4e39210a3
1 changed files with 75 additions and 30 deletions
|
|
@ -1705,45 +1705,90 @@ build_3d_lut(struct weston_compositor *compositor, cmsHTRANSFORM cmap_3dlut,
|
|||
unsigned int len_shaper, const float *shaper,
|
||||
unsigned int len_lut3d, float *lut3d)
|
||||
{
|
||||
float divider = len_lut3d - 1;
|
||||
float rgb_in[3], rgb_out[3];
|
||||
uint32_t index, index_r, index_g, index_b;
|
||||
const float *curves[3];
|
||||
const float *const red_curve = &shaper[0];
|
||||
const float *const green_curve = &shaper[len_shaper];
|
||||
const float *const blue_curve = &shaper[2 * len_shaper];
|
||||
uint32_t index_r, index_g, index_b;
|
||||
uint32_t i;
|
||||
float *tmp;
|
||||
float *inverse_r;
|
||||
float *inverse_g;
|
||||
struct weston_vec3f *rgb_in;
|
||||
|
||||
curves[0] = &shaper[0];
|
||||
curves[1] = &shaper[len_shaper];
|
||||
curves[2] = &shaper[2 * len_shaper];
|
||||
/*
|
||||
* Ensure the indices and byte counts cannot overflow,
|
||||
* and memory usage does not get ridiculous. Arbitrary limit.
|
||||
*/
|
||||
weston_assert_u32_lt(compositor, len_lut3d, 100);
|
||||
|
||||
/*
|
||||
* A temporary allocation that holds two 1D LUTs of length len_lut3d
|
||||
* and one scratch array of vec3f of length len_lut3d.
|
||||
*/
|
||||
const uint32_t bytes_per_elem = 2 * sizeof (float) + sizeof *rgb_in;
|
||||
tmp = malloc(len_lut3d * bytes_per_elem);
|
||||
|
||||
inverse_r = &tmp[0];
|
||||
inverse_g = &tmp[len_lut3d];
|
||||
rgb_in = (struct weston_vec3f *)&tmp[2 * len_lut3d];
|
||||
|
||||
/*
|
||||
* For each channel, use the shaper to compute the value x such that
|
||||
* y(x) = index / (len - 1). As the shaper is a LUT, we find the closest
|
||||
* neighbors of such point (x, y) and then use linear interpolation to
|
||||
* estimate x.
|
||||
*/
|
||||
for (i = 0; i < len_lut3d; i++) {
|
||||
float y = (float)i / (len_lut3d - 1);
|
||||
inverse_r[i] = weston_inverse_evaluate_lut1d(compositor,
|
||||
len_shaper,
|
||||
red_curve,
|
||||
y);
|
||||
inverse_g[i] = weston_inverse_evaluate_lut1d(compositor,
|
||||
len_shaper,
|
||||
green_curve,
|
||||
y);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill in the 3D LUT: LUT(Rin, Gin, Bin) = { Rout, Gout, Bout }
|
||||
* Each of Rin, Gin and Bin varies from 0.0 to 1.0. The range [0.0, 1.0]
|
||||
* is evenly divided into len_lut3d number of sampling points. The
|
||||
* indices of the sampling points are index_r, index_g, index_b.
|
||||
*
|
||||
* To compute { Rout, Gout, Bout }, first Rin, Gin, Bin must go through
|
||||
* the shaper 1D LUTs in reverse. This was pre-computed into
|
||||
* inverse_r and inverse_g above, and inverse_b is computed below.
|
||||
* This was done one dimension (channel) at a time, because they are
|
||||
* separable.
|
||||
*
|
||||
* The next step is not separable, so we iterate through all points in
|
||||
* the 3D volume. The points are transformed len_lut3d points at a time
|
||||
* (rgb_in array) to strike a balance between the number of function
|
||||
* calls and the memory requirements.
|
||||
*/
|
||||
for (index_b = 0; index_b < len_lut3d; index_b++) {
|
||||
float inverse_b = weston_inverse_evaluate_lut1d(compositor,
|
||||
len_shaper,
|
||||
blue_curve,
|
||||
(float)index_b / (len_lut3d - 1));
|
||||
for (i = 0; i < len_lut3d; i++)
|
||||
rgb_in[i].b = inverse_b;
|
||||
|
||||
for (index_g = 0; index_g < len_lut3d; index_g++) {
|
||||
for (index_r = 0; index_r < len_lut3d; index_r++) {
|
||||
/**
|
||||
* For each channel, use the shaper to compute
|
||||
* the value x such that y(x) = index / divider.
|
||||
* As the shapper is a LUT, we find the closest
|
||||
* neighbors of such point (x, y) and then use
|
||||
* linear interpolation to estimate x.
|
||||
*/
|
||||
rgb_in[0] = weston_inverse_evaluate_lut1d(compositor,len_shaper,
|
||||
curves[0],
|
||||
(float)index_r / divider);
|
||||
rgb_in[1] = weston_inverse_evaluate_lut1d(compositor, len_shaper,
|
||||
curves[1],
|
||||
(float)index_g / divider);
|
||||
rgb_in[2] = weston_inverse_evaluate_lut1d(compositor, len_shaper,
|
||||
curves[2],
|
||||
(float)index_b / divider);
|
||||
|
||||
cmsDoTransform(cmap_3dlut, rgb_in, rgb_out, 1);
|
||||
|
||||
index = 3 * (index_r + len_lut3d * (index_g + len_lut3d * index_b));
|
||||
lut3d[index ] = rgb_out[0];
|
||||
lut3d[index + 1] = rgb_out[1];
|
||||
lut3d[index + 2] = rgb_out[2];
|
||||
rgb_in[index_r].g = inverse_g[index_g];
|
||||
rgb_in[index_r].r = inverse_r[index_r];
|
||||
}
|
||||
|
||||
index_r = 0;
|
||||
i = 3 * (index_r + len_lut3d * (index_g + len_lut3d * index_b));
|
||||
cmsDoTransform(cmap_3dlut, rgb_in, &lut3d[i], len_lut3d);
|
||||
}
|
||||
}
|
||||
|
||||
free(tmp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue