Optimize gradient computations

We update the test suite reference images where needed, (pdiff
avoided a few, but most still needed updating). We take advantage
of the need for new reference images to shrink some of the giant
tests to speed them up a bit.

This optimization provides a 2x improvement in linear gradient
generation performance (numbers from an x86 laptop):

image-rgb    paint_linear_rgba_source-512 26.13 -> 11.13: 2.35x speedup
█▍
image-rgb    paint_linear_rgba_source-256  6.47 ->  2.76: 2.34x speedup
█▍
image-rgba      paint_linear_rgb_over-256  6.51 ->  2.86: 2.28x speedup
█▎
image-rgb      paint_linear_rgba_over-512 28.62 -> 13.70: 2.09x speedup
█▏
image-rgba       fill_linear_rgb_over-256  3.24 ->  1.94: 1.66x speedup
▋
image-rgb     stroke_linear_rgba_over-256  5.68 ->  4.10: 1.39x speedup
▍
This commit is contained in:
David Turner 2006-12-14 06:55:58 -08:00 committed by Carl Worth
parent af9cce6aa2
commit e9bef30d2b
24 changed files with 319 additions and 148 deletions

View file

@ -2731,78 +2731,231 @@ static void fbFetch(PicturePtr pict, int x, int y, int width, CARD32 *buffer, CA
#define DIV(a,b) ((((a) < 0) == ((b) < 0)) ? (a) / (b) :\
((a) - (b) + 1 - (((b) < 0) << 1)) / (b))
static CARD32
xRenderColorMultToCard32 (pixman_color_t *c)
typedef struct
{
return
((((uint32_t) c->red * c->alpha) >> 24) << 16) |
((((uint32_t) c->green * c->alpha) >> 24) << 8) |
((((uint32_t) c->blue * c->alpha) >> 24) << 0) |
((((uint32_t) c->alpha ) >> 8) << 24);
CARD32 left_ag;
CARD32 left_rb;
CARD32 right_ag;
CARD32 right_rb;
int32_t left_x;
int32_t right_x;
int32_t width_x;
int32_t stepper;
pixman_gradient_stop_t *stops;
int num_stops;
unsigned int spread;
} GradientWalker;
static void
_gradient_walker_init (GradientWalker *walker,
SourcePictPtr pGradient,
unsigned int spread)
{
walker->num_stops = pGradient->gradient.nstops;
walker->stops = pGradient->gradient.stops;
walker->left_x = 0;
walker->right_x = 0x10000;
walker->width_x = 0; /* will force a reset */
walker->stepper = 0;
walker->left_ag = 0;
walker->left_rb = 0;
walker->right_ag = 0;
walker->right_rb = 0;
walker->spread = spread;
}
static CARD32 gradientPixel(const SourcePictPtr pGradient, xFixed_48_16 pos, unsigned int spread)
static void
_gradient_walker_reset (GradientWalker *walker,
xFixed_32_32 pos)
{
int ipos = (pos * pGradient->gradient.stopRange - 1) >> 16;
int32_t x, left_x, right_x;
pixman_color_t *left_c, *right_c;
int n, count = walker->num_stops;
pixman_gradient_stop_t *stops = walker->stops;
/* calculate the actual offset. */
if (ipos < 0 || ipos >= pGradient->gradient.stopRange)
static const pixman_color_t transparent_black = { 0, 0, 0, 0 };
switch (walker->spread)
{
if (pGradient->type == SourcePictTypeConical || spread == RepeatNormal)
{
ipos = ipos % pGradient->gradient.stopRange;
ipos = ipos < 0 ? pGradient->gradient.stopRange + ipos : ipos;
case RepeatNormal:
x = (int32_t)pos & 0xFFFF;
for (n = 0; n < count; n++)
if (x < stops[n].x)
break;
if (n == 0) {
left_x = stops[count-1].x - 0x10000;
left_c = &stops[count-1].color;
} else {
left_x = stops[n-1].x;
left_c = &stops[n-1].color;
}
else if (spread == RepeatReflect)
{
const int limit = pGradient->gradient.stopRange * 2 - 1;
ipos = ipos % limit;
ipos = ipos < 0 ? limit + ipos : ipos;
ipos = ipos >= pGradient->gradient.stopRange ? limit - ipos : ipos;
if (n == count) {
right_x = stops[0].x + 0x10000;
right_c = &stops[0].color;
} else {
right_x = stops[n].x;
right_c = &stops[n].color;
}
left_x += (pos - x);
right_x += (pos - x);
break;
case RepeatPad:
for (n = 0; n < count; n++)
if (pos < stops[n].x)
break;
if (n == 0) {
left_x = INT_MIN;
left_c = &stops[0].color;
} else {
left_x = stops[n-1].x;
left_c = &stops[n-1].color;
}
else if (spread == RepeatPad)
{
if (ipos < 0)
ipos = 0;
else
ipos = pGradient->gradient.stopRange - 1;
if (n == count) {
right_x = INT_MAX;
right_c = &stops[n-1].color;
} else {
right_x = stops[n].x;
right_c = &stops[n].color;
}
else /* RepeatNone */
break;
case RepeatReflect:
x = (int32_t)pos & 0xFFFF;
if ((int32_t)pos & 0x10000)
x = 0x10000 - x;
for (n = 0; n < count; n++)
if (x < stops[n].x)
break;
if (n == 0) {
left_x = -stops[0].x;
left_c = &stops[0].color;
} else {
left_x = stops[n-1].x;
left_c = &stops[n-1].color;
}
if (n == count) {
right_x = 0x20000 - stops[n-1].x;
right_c = &stops[n-1].color;
} else {
right_x = stops[n].x;
right_c = &stops[n].color;
}
if ((int32_t)pos & 0x10000) {
pixman_color_t *tmp_c;
int32_t tmp_x;
tmp_x = 0x20000 - right_x;
right_x = 0x20000 - left_x;
left_x = tmp_x;
tmp_c = right_c;
right_c = left_c;
left_c = tmp_c;
}
left_x += (pos - x);
right_x += (pos - x);
break;
default: /* RepeatNone */
for (n = 0; n < count; n++)
if (pos < stops[n].x)
break;
if (n == 0)
{
return 0;
left_x = INT_MIN;
right_x = stops[0].x;
left_c = right_c = (pixman_color_t*) &transparent_black;
}
else if (n == count)
{
left_x = stops[n-1].x;
right_x = INT_MAX;
left_c = right_c = (pixman_color_t*) &transparent_black;
}
else
{
left_x = stops[n-1].x;
right_x = stops[n].x;
left_c = &stops[n-1].color;
right_c = &stops[n].color;
}
}
if (pGradient->gradient.colorTableSize)
walker->left_x = left_x;
walker->right_x = right_x;
walker->width_x = right_x - left_x;
walker->left_ag = ((left_c->alpha >> 8) << 16) | (left_c->green >> 8);
walker->left_rb = ((left_c->red & 0xff00) << 8) | (left_c->blue >> 8);
walker->right_ag = ((right_c->alpha >> 8) << 16) | (right_c->green >> 8);
walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
if ( walker->width_x == 0 ||
( walker->left_ag == walker->right_ag &&
walker->left_rb == walker->right_rb ) )
{
return pGradient->gradient.colorTable[ipos];
walker->width_x = 1;
walker->stepper = 0;
}
else
{
int i;
if (ipos <= pGradient->gradient.stops->x)
return xRenderColorMultToCard32 (&pGradient->gradient.stops->color);
for (i = 1; i < pGradient->gradient.nstops; i++)
{
if (pGradient->gradient.stops[i].x >= ipos)
return PictureGradientColor (&pGradient->gradient.stops[i - 1],
&pGradient->gradient.stops[i],
ipos);
}
return xRenderColorMultToCard32 (&pGradient->gradient.stops[--i].color);
walker->stepper = ((1 << 24) + walker->width_x/2)/walker->width_x;
}
}
#define GRADIENT_WALKER_NEED_RESET(w,x) \
( (x) < (w)->left_x || (x) - (w)->left_x >= (w)->width_x )
/* the following assumes that GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */
static CARD32
_gradient_walker_pixel (GradientWalker *walker,
xFixed_32_32 x)
{
int dist, idist;
uint32_t t1, t2, a, color;
if (GRADIENT_WALKER_NEED_RESET (walker, x))
_gradient_walker_reset (walker, x);
dist = ((int)(x - walker->left_x)*walker->stepper) >> 16;
idist = 256 - dist;
/* combined INTERPOLATE and premultiply */
t1 = walker->left_rb*idist + walker->right_rb*dist;
t1 = (t1 >> 8) & 0xff00ff;
t2 = walker->left_ag*idist + walker->right_ag*dist;
t2 &= 0xff00ff00;
color = t2 & 0xff000000;
a = t2 >> 24;
t1 = t1*a + 0x800080;
t1 = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
t2 = (t2 >> 8)*a + 0x800080;
t2 = (t2 + ((t2 >> 8) & 0xff00ff));
return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
}
static void fbFetchSourcePict(PicturePtr pict, int x, int y, int width, CARD32 *buffer, CARD32 *mask, CARD32 maskBits)
{
SourcePictPtr pGradient = pict->pSourcePict;
CARD32 *end = buffer + width;
SourcePictPtr pGradient = pict->pSourcePict;
GradientWalker walker;
CARD32 *end = buffer + width;
_gradient_walker_init (&walker, pGradient, pict->repeat);
if (pGradient->type == SourcePictTypeSolidFill) {
register CARD32 color = pGradient->solidFill.color;
@ -2853,20 +3006,29 @@ static void fbFetchSourcePict(PicturePtr pict, int x, int y, int width, CARD32 *
{
register CARD32 color;
color = gradientPixel (pGradient, t, pict->repeat);
color = _gradient_walker_pixel( &walker, t );
while (buffer < end)
*buffer++ = color;
}
else
{
while (buffer < end) {
if (!mask || *mask++ & maskBits)
{
*buffer = gradientPixel (pGradient, t, pict->repeat);
}
++buffer;
t += inc;
}
if (!mask) {
while (buffer < end)
{
*buffer = _gradient_walker_pixel (&walker, t);
buffer += 1;
t += inc;
}
} else {
while (buffer < end) {
if (*mask++ & maskBits)
{
*buffer = _gradient_walker_pixel (&walker, t);
}
buffer += 1;
t += inc;
}
}
}
}
else /* projective transformation */
@ -2890,31 +3052,31 @@ static void fbFetchSourcePict(PicturePtr pict, int x, int y, int width, CARD32 *
t = ((a * x + b * y) >> 16) + off;
}
color = gradientPixel (pGradient, t, pict->repeat);
color = _gradient_walker_pixel( &walker, t );
while (buffer < end)
*buffer++ = color;
}
else
{
while (buffer < end)
{
if (!mask || *mask++ & maskBits)
{
if (v.vector[2] == 0) {
t = 0;
} else {
xFixed_48_16 x, y;
x = ((xFixed_48_16)v.vector[0] << 16) / v.vector[2];
y = ((xFixed_48_16)v.vector[1] << 16) / v.vector[2];
t = ((a*x + b*y) >> 16) + off;
}
*buffer = gradientPixel(pGradient, t, pict->repeat);
}
++buffer;
v.vector[0] += unit.vector[0];
v.vector[1] += unit.vector[1];
v.vector[2] += unit.vector[2];
}
while (buffer < end)
{
if (!mask || *mask++ & maskBits)
{
if (v.vector[2] == 0) {
t = 0;
} else {
xFixed_48_16 x, y;
x = ((xFixed_48_16)v.vector[0] << 16) / v.vector[2];
y = ((xFixed_48_16)v.vector[1] << 16) / v.vector[2];
t = ((a*x + b*y) >> 16) + off;
}
*buffer = _gradient_walker_pixel (&walker, t);
}
++buffer;
v.vector[0] += unit.vector[0];
v.vector[1] += unit.vector[1];
v.vector[2] += unit.vector[2];
}
}
}
} else {
@ -2951,19 +3113,22 @@ static void fbFetchSourcePict(PicturePtr pict, int x, int y, int width, CARD32 *
ry -= pGradient->radial.fy;
while (buffer < end) {
double b, c, det, s;
double b, c, det, s;
if (!mask || *mask++ & maskBits)
{
b = 2*(rx*pGradient->radial.dx + ry*pGradient->radial.dy);
c = -(rx*rx + ry*ry);
det = (b * b) - (4 * pGradient->radial.a * c);
s = (-b + sqrt(det))/(2. * pGradient->radial.a);
*buffer = gradientPixel(pGradient,
(xFixed_48_16)((s*pGradient->radial.m + pGradient->radial.b)*65536),
pict->repeat);
}
++buffer;
if (!mask || *mask++ & maskBits)
{
xFixed_48_16 t;
b = 2*(rx*pGradient->radial.dx + ry*pGradient->radial.dy);
c = -(rx*rx + ry*ry);
det = (b * b) - (4 * pGradient->radial.a * c);
s = (-b + sqrt(det))/(2. * pGradient->radial.a);
t = (xFixed_48_16)((s*pGradient->radial.m + pGradient->radial.b)*65536);
*buffer = _gradient_walker_pixel (&walker, t);
}
++buffer;
rx += cx;
ry += cy;
}
@ -2972,25 +3137,27 @@ static void fbFetchSourcePict(PicturePtr pict, int x, int y, int width, CARD32 *
double x, y;
double b, c, det, s;
if (!mask || *mask++ & maskBits)
{
if (rz != 0) {
x = rx/rz;
y = ry/rz;
} else {
x = y = 0.;
}
x -= pGradient->radial.fx;
y -= pGradient->radial.fy;
b = 2*(x*pGradient->radial.dx + y*pGradient->radial.dy);
c = -(x*x + y*y);
det = (b * b) - (4 * pGradient->radial.a * c);
s = (-b + sqrt(det))/(2. * pGradient->radial.a);
*buffer = gradientPixel(pGradient,
(xFixed_48_16)((s*pGradient->radial.m + pGradient->radial.b)*65536),
pict->repeat);
}
++buffer;
if (!mask || *mask++ & maskBits)
{
xFixed_48_16 t;
if (rz != 0) {
x = rx/rz;
y = ry/rz;
} else {
x = y = 0.;
}
x -= pGradient->radial.fx;
y -= pGradient->radial.fy;
b = 2*(x*pGradient->radial.dx + y*pGradient->radial.dy);
c = -(x*x + y*y);
det = (b * b) - (4 * pGradient->radial.a * c);
s = (-b + sqrt(det))/(2. * pGradient->radial.a);
t = (xFixed_48_16)((s*pGradient->radial.m + pGradient->radial.b)*65536);
*buffer = _gradient_walker_pixel (&walker, t);
}
++buffer;
rx += cx;
ry += cy;
rz += cz;
@ -3005,37 +3172,41 @@ static void fbFetchSourcePict(PicturePtr pict, int x, int y, int width, CARD32 *
while (buffer < end) {
double angle;
if (!mask || *mask++ & maskBits)
{
angle = atan2(ry, rx) + a;
if (!mask || *mask++ & maskBits)
{
xFixed_48_16 t;
*buffer = gradientPixel(pGradient, (xFixed_48_16) (angle * (65536. / (2*M_PI))),
pict->repeat);
}
angle = atan2(ry, rx) + a;
t = (xFixed_48_16) (angle * (65536. / (2*M_PI)));
*buffer = _gradient_walker_pixel (&walker, t);
}
++buffer;
rx += cx;
ry += cy;
}
} else {
while (buffer < end) {
double x, y;
double angle;
double angle;
if (!mask || *mask++ & maskBits)
{
if (rz != 0) {
x = rx/rz;
y = ry/rz;
} else {
x = y = 0.;
}
x -= pGradient->conical.center.x/65536.;
y -= pGradient->conical.center.y/65536.;
angle = atan2(y, x) + a;
*buffer = gradientPixel(pGradient, (xFixed_48_16) (angle * (65536. / (2*M_PI))),
pict->repeat);
}
if (!mask || *mask++ & maskBits)
{
xFixed_48_16 t;
if (rz != 0) {
x = rx/rz;
y = ry/rz;
} else {
x = y = 0.;
}
x -= pGradient->conical.center.x/65536.;
y -= pGradient->conical.center.y/65536.;
angle = atan2(y, x) + a;
t = (xFixed_48_16) (angle * (65536. / (2*M_PI)));
*buffer = _gradient_walker_pixel (&walker, t);
}
++buffer;
rx += cx;
ry += cy;

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

After

Width:  |  Height:  |  Size: 8.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 3.2 KiB

View file

@ -27,9 +27,9 @@
#include "cairo-test.h"
#include <stdio.h>
#define WIDTH 64
#define HEIGHT 64
#define PAD 10
#define WIDTH 16
#define HEIGHT 16
#define PAD 2
const char png_filename[] = "romedalen.png";

Binary file not shown.

Before

Width:  |  Height:  |  Size: 86 KiB

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 69 KiB

After

Width:  |  Height:  |  Size: 6.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 86 KiB

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 80 KiB

View file

@ -28,9 +28,9 @@
#include "cairo-test.h"
#include <stdio.h>
#define WIDTH 64
#define HEIGHT 64
#define PAD 10
#define WIDTH 16
#define HEIGHT 16
#define PAD 2
const char png_filename[] = "romedalen.png";

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.8 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.1 KiB

After

Width:  |  Height:  |  Size: 965 B

View file

@ -28,9 +28,9 @@
#include "cairo-test.h"
#include <stdio.h>
#define WIDTH 64
#define HEIGHT 64
#define PAD 10
#define WIDTH 16
#define HEIGHT 16
#define PAD 2
const char png_filename[] = "romedalen.png";

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 4.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 3.1 KiB

View file

@ -28,9 +28,9 @@
#include "cairo-test.h"
#include <stdio.h>
#define WIDTH 64
#define HEIGHT 64
#define PAD 10
#define WIDTH 16
#define HEIGHT 16
#define PAD 2
const char png_filename[] = "romedalen.png";

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 70 KiB

After

Width:  |  Height:  |  Size: 5.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

After

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 70 KiB

View file

@ -27,9 +27,9 @@
#include "cairo-test.h"
#include <stdio.h>
#define WIDTH 64
#define HEIGHT 64
#define PAD 10
#define WIDTH 16
#define HEIGHT 16
#define PAD 2
const char png_filename[] = "romedalen.png";

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.9 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

View file

@ -28,9 +28,9 @@
#include "cairo-test.h"
#include <stdio.h>
#define WIDTH 64
#define HEIGHT 64
#define PAD 10
#define WIDTH 16
#define HEIGHT 16
#define PAD 2
static void
draw_mask (cairo_t *cr, int x, int y)