From 786d1f962a7e7ee7036ea85d4ea3aec7580e25db Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 31 Jul 2009 13:17:24 +0100 Subject: [PATCH] [xlib] Use server-side gradients. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can offload creation of gradients to server that support RENDER 0.10 and later. This greatly reduces the amount of traffic we need to send over our display connection as the gradient patterns are much smaller than the full image. Even if the server fallbacks to using pixman, performance should be improved by the reduced transport overhead. Furthermore this is a requisite to enable hardware accelerated gradients with the xlib backend. Running cairo-perf-trace on tiny, Celeron/i915: before: firefox-20090601 211.585 after: firefox-20090601 270.939 and on tiger, CoreDuo/nvidia: before: firefox-20090601 70.143 after: firefox-20090601 87.326 where linear gradients are used extensively throughout the GTK+ theme. Not quite the result I was expecting! In particular, looking at tiny: xlib-rgba paint-with-alpha_linear-rgba_over-512 47.11 (47.16 0.05%) -> 123.42 (123.72 0.13%): 2.62x slowdown █▋ xlib-rgba paint-with-alpha_linear3-rgba_over-512 47.27 (47.32 0.04%) -> 123.78 (124.04 0.13%): 2.62x slowdown █▋ xlib-rgba paint-with-alpha_linear-rgb_over-512 47.19 (47.21 0.02%) -> 123.37 (123.70 0.13%): 2.61x slowdown █▋ xlib-rgba paint-with-alpha_linear3-rgb_over-512 47.30 (47.31 0.04%) -> 123.52 (123.62 0.09%): 2.61x slowdown █▋ xlib-rgba paint_linear3-rgb_over-512 47.29 (47.32 0.05%) -> 118.95 (119.60 0.29%): 2.52x slowdown █▌ xlib-rgba paint_linear-rgba_over-512 47.14 (47.17 0.06%) -> 116.76 (117.06 0.16%): 2.48x slowdown █▌ xlib-rgba paint_linear3-rgba_over-512 47.32 (47.34 0.04%) -> 116.85 (116.98 0.05%): 2.47x slowdown █▌ xlib-rgba paint_linear-rgb_over-512 47.15 (47.19 0.03%) -> 114.08 (114.55 0.20%): 2.42x slowdown █▍ xlib-rgba paint-with-alpha_radial-rgb_over-512 117.25 (119.43 1.21%) -> 194.36 (194.73 0.09%): 1.66x slowdown ▋ xlib-rgba paint-with-alpha_radial-rgba_over-512 117.22 (117.26 0.02%) -> 193.81 (194.17 0.11%): 1.65x slowdown ▋ xlib-rgba paint_radial-rgba_over-512 117.23 (117.26 0.02%) -> 186.35 (186.41 0.03%): 1.59x slowdown ▋ xlib-rgba paint_radial-rgb_over-512 117.23 (117.27 0.02%) -> 184.14 (184.62 1.51%): 1.57x slowdown ▋ Before 1.10, we may choose to disable server-side gradients for the current crop of Xorg servers, similar to the extended repeat modes. [Updated by Chris Wilson. All bugs are his.] --- NEWS | 28 +++ src/cairo-xlib-display.c | 9 +- src/cairo-xlib-private.h | 3 +- src/cairo-xlib-surface-private.h | 5 +- src/cairo-xlib-surface.c | 306 ++++++++++++++++++++++++++++--- 5 files changed, 318 insertions(+), 33 deletions(-) diff --git a/NEWS b/NEWS index a561a3f9a..9c798dd28 100644 --- a/NEWS +++ b/NEWS @@ -26,6 +26,34 @@ New utilities: Further minimisation of the fail trace using "delta debugging". More control over test/reference targets. +Backend improvements: + + xlib + + Server-side gradients. The theory is that we can offload computation + of gradients to the GPU and avoid pushing large images over the + connection. Even if the driver has to fallback and use pixman to render + a temporary source, it should be able to do so in a more efficient manner + than Cairo itself. However, cairo-perf suggests otherwise: + + On tiny, Celeron/i915: + + before: firefox-20090601 211.585 + after: firefox-20090601 270.939 + + and on tiger, CoreDuo/nvidia: + + before: firefox-20090601 70.143 + after: firefox-20090601 87.326 + + In particular, looking at tiny: + + xlib-rgba paint-with-alpha_linear-rgba_over-512 47.11 (47.16 0.05%) -> 123.42 (123.72 0.13%): 2.62x slowdown + █▋ + xlib-rgba paint-with-alpha_linear3-rgba_over-512 47.27 (47.32 0.04%) -> 123.78 (124.04 0.13%): 2.62x slowdown + █▋ + + New experimental backends: QT diff --git a/src/cairo-xlib-display.c b/src/cairo-xlib-display.c index d9ee90483..880ac9303 100644 --- a/src/cairo-xlib-display.c +++ b/src/cairo-xlib-display.c @@ -284,8 +284,9 @@ _cairo_xlib_display_get (Display *dpy, memset (display->cached_xrender_formats, 0, sizeof (display->cached_xrender_formats)); - display->buggy_repeat = FALSE; + display->buggy_gradients = FALSE; display->buggy_pad_reflect = TRUE; + display->buggy_repeat = FALSE; /* This buggy_repeat condition is very complicated because there * are multiple X server code bases (with multiple versioning @@ -335,6 +336,12 @@ _cairo_xlib_display_get (Display *dpy, if (VendorRelease (dpy) >= 60700000) { if (VendorRelease (dpy) < 70000000) display->buggy_repeat = TRUE; + + /* We know that gradients simply do not work in eary Xorg servers */ + if (VendorRelease (dpy) < 70200000) + { + display->buggy_gradients = TRUE; + } } else { if (VendorRelease (dpy) < 10400000) display->buggy_repeat = TRUE; diff --git a/src/cairo-xlib-private.h b/src/cairo-xlib-private.h index c79617bf5..b980b0745 100644 --- a/src/cairo-xlib-private.h +++ b/src/cairo-xlib-private.h @@ -72,8 +72,9 @@ struct _cairo_xlib_display { cairo_freelist_t wq_freelist; cairo_xlib_hook_t *close_display_hooks; - unsigned int buggy_repeat :1; + unsigned int buggy_gradients :1; unsigned int buggy_pad_reflect :1; + unsigned int buggy_repeat :1; unsigned int closed :1; }; diff --git a/src/cairo-xlib-surface-private.h b/src/cairo-xlib-surface-private.h index 53701e249..d963cd6a5 100644 --- a/src/cairo-xlib-surface-private.h +++ b/src/cairo-xlib-surface-private.h @@ -75,8 +75,9 @@ struct _cairo_xlib_surface { * Both are fixed in xorg >= 6.9 and hopefully in > 6.8.2, so * we can reuse the test for now. */ - cairo_bool_t buggy_repeat; - cairo_bool_t buggy_pad_reflect; + unsigned int buggy_gradients : 1; + unsigned int buggy_pad_reflect : 1; + unsigned int buggy_repeat : 1; int width; int height; diff --git a/src/cairo-xlib-surface.c b/src/cairo-xlib-surface.c index 7ffd1b8af..382cde568 100644 --- a/src/cairo-xlib-surface.c +++ b/src/cairo-xlib-surface.c @@ -153,6 +153,7 @@ static const XTransform identity = { { #define CAIRO_SURFACE_RENDER_HAS_FILTERS(surface) CAIRO_SURFACE_RENDER_AT_LEAST((surface), 0, 6) #define CAIRO_SURFACE_RENDER_HAS_EXTENDED_REPEAT(surface) CAIRO_SURFACE_RENDER_AT_LEAST((surface), 0, 10) +#define CAIRO_SURFACE_RENDER_HAS_GRADIENTS(surface) CAIRO_SURFACE_RENDER_AT_LEAST((surface), 0, 10) #define CAIRO_SURFACE_RENDER_HAS_PDF_OPERATORS(surface) CAIRO_SURFACE_RENDER_AT_LEAST((surface), 0, 11) @@ -1886,6 +1887,254 @@ _render_operator (cairo_operator_t op) } } +static cairo_int_status_t +_cairo_xlib_surface_acquire_pattern_surface (cairo_xlib_surface_t *dst, + const cairo_pattern_t *pattern, + cairo_content_t content, + int x, int y, + int width, int height, + cairo_xlib_surface_t **surface_out, + cairo_surface_attributes_t *attributes) +{ + switch (pattern->type) { + case CAIRO_PATTERN_TYPE_LINEAR: + case CAIRO_PATTERN_TYPE_RADIAL: + { + cairo_gradient_pattern_t *gradient = + (cairo_gradient_pattern_t *) pattern; + cairo_matrix_t matrix = pattern->matrix; + cairo_xlib_surface_t *surface; + char buf[CAIRO_STACK_BUFFER_SIZE]; + XFixed *stops; + XRenderColor *colors; + XRenderPictFormat *format; + Picture picture; + unsigned int i; + + if (dst->buggy_gradients) + break; + + if (gradient->n_stops < 2) /* becomes a solid */ + break; + + if (gradient->n_stops < sizeof (buf) / (sizeof (XFixed) + sizeof (XRenderColor))) + { + stops = (XFixed *) buf; + } + else + { + stops = + _cairo_malloc_ab (gradient->n_stops, + sizeof (XFixed) + sizeof (XRenderColor)); + if (unlikely (stops == NULL)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + } + + colors = (XRenderColor *) (stops + gradient->n_stops); + for (i = 0; i < gradient->n_stops; i++) { + stops[i] = + _cairo_fixed_16_16_from_double (gradient->stops[i].offset); + + colors[i].red = gradient->stops[i].color.red_short; + colors[i].green = gradient->stops[i].color.green_short; + colors[i].blue = gradient->stops[i].color.blue_short; + colors[i].alpha = gradient->stops[i].color.alpha_short; + } + +#if 0 + /* For some weird reason the X server is sometimes getting + * CreateGradient requests with bad length. So far I've only seen + * XRenderCreateLinearGradient request with 4 stops sometime end up + * with length field matching 0 stops at the server side. I've + * looked at the libXrender code and I can't see anything that + * could cause this behavior. However, for some reason having a + * XSync call here seems to avoid the issue so I'll keep it here + * until it's solved. + */ + XSync (dst->dpy, False); +#endif + + if (pattern->type == CAIRO_PATTERN_TYPE_LINEAR) { + cairo_linear_pattern_t *linear = (cairo_linear_pattern_t *) pattern; + XLinearGradient grad; + + cairo_fixed_t xdim, ydim; + + xdim = linear->p2.x - linear->p1.x; + ydim = linear->p2.y - linear->p1.y; + + /* + * Transform the matrix to avoid overflow when converting between + * cairo_fixed_t and pixman_fixed_t (without incurring performance + * loss when the transformation is unnecessary). + * + * XXX: Consider converting out-of-range co-ordinates and transforms. + * Having a function to compute the required transformation to + * "normalize" a given bounding box would be generally useful - + * cf linear patterns, gradient patterns, surface patterns... + */ +#define PIXMAN_MAX_INT ((pixman_fixed_1 >> 1) - pixman_fixed_e) /* need to ensure deltas also fit */ + if (_cairo_fixed_integer_ceil (xdim) > PIXMAN_MAX_INT || + _cairo_fixed_integer_ceil (ydim) > PIXMAN_MAX_INT) + { + double sf; + + if (xdim > ydim) + sf = PIXMAN_MAX_INT / _cairo_fixed_to_double (xdim); + else + sf = PIXMAN_MAX_INT / _cairo_fixed_to_double (ydim); + + grad.p1.x = _cairo_fixed_16_16_from_double (_cairo_fixed_to_double (linear->p1.x) * sf); + grad.p1.y = _cairo_fixed_16_16_from_double (_cairo_fixed_to_double (linear->p1.y) * sf); + grad.p2.x = _cairo_fixed_16_16_from_double (_cairo_fixed_to_double (linear->p2.x) * sf); + grad.p2.y = _cairo_fixed_16_16_from_double (_cairo_fixed_to_double (linear->p2.y) * sf); + + cairo_matrix_scale (&matrix, sf, sf); + } + else + { + grad.p1.x = _cairo_fixed_to_16_16 (linear->p1.x); + grad.p1.y = _cairo_fixed_to_16_16 (linear->p1.y); + grad.p2.x = _cairo_fixed_to_16_16 (linear->p2.x); + grad.p2.y = _cairo_fixed_to_16_16 (linear->p2.y); + } + + picture = XRenderCreateLinearGradient (dst->dpy, &grad, + stops, colors, + gradient->n_stops); + } else { + cairo_radial_pattern_t *radial = (cairo_radial_pattern_t *) pattern; + XRadialGradient grad; + + grad.inner.x = _cairo_fixed_to_16_16 (radial->c1.x); + grad.inner.y = _cairo_fixed_to_16_16 (radial->c1.y); + grad.inner.radius = _cairo_fixed_to_16_16 (radial->r1); + + grad.outer.x = _cairo_fixed_to_16_16 (radial->c2.x); + grad.outer.y = _cairo_fixed_to_16_16 (radial->c2.y); + grad.outer.radius = _cairo_fixed_to_16_16 (radial->r2); + + picture = XRenderCreateRadialGradient (dst->dpy, &grad, + stops, colors, + gradient->n_stops); + + } + + if (stops != (XFixed *) buf) + free (stops); + + if (unlikely (picture == None)) + return _cairo_error (CAIRO_STATUS_NO_MEMORY); + + /* Wrap the remote Picture in an xlib surface. */ + format = _cairo_xlib_display_get_xrender_format (dst->display, + CAIRO_FORMAT_ARGB32); + + surface = (cairo_xlib_surface_t *) + _cairo_xlib_surface_create_internal (dst->dpy, None, + dst->screen, NULL, + format, 0, 0, 32); + if (unlikely (surface->base.status)) { + XRenderFreePicture (dst->dpy, picture); + return surface->base.status; + } + + surface->src_picture = picture; + + attributes->matrix = matrix; + attributes->extend = pattern->extend; + attributes->filter = CAIRO_FILTER_NEAREST; + attributes->x_offset = 0; + attributes->y_offset = 0; + + *surface_out = surface; + return CAIRO_STATUS_SUCCESS; + } + default: + ASSERT_NOT_REACHED; + case CAIRO_PATTERN_TYPE_SOLID: + case CAIRO_PATTERN_TYPE_SURFACE: + break; + } + + return _cairo_pattern_acquire_surface (pattern, &dst->base, + content, + x, y, width, height, + dst->buggy_pad_reflect ? + CAIRO_PATTERN_ACQUIRE_NO_REFLECT : + CAIRO_PATTERN_ACQUIRE_NONE, + (cairo_surface_t **) surface_out, + attributes); +} + +static cairo_int_status_t +_cairo_xlib_surface_acquire_pattern_surfaces (cairo_xlib_surface_t *dst, + const cairo_pattern_t *src, + const cairo_pattern_t *mask, + cairo_content_t src_content, + int src_x, + int src_y, + int mask_x, + int mask_y, + unsigned int width, + unsigned int height, + cairo_xlib_surface_t **src_out, + cairo_xlib_surface_t **mask_out, + cairo_surface_attributes_t *src_attr, + cairo_surface_attributes_t *mask_attr) +{ + if (! dst->buggy_gradients && + (src->type == CAIRO_PATTERN_TYPE_LINEAR || + src->type == CAIRO_PATTERN_TYPE_RADIAL || + (mask && (mask->type == CAIRO_PATTERN_TYPE_LINEAR || + mask->type == CAIRO_PATTERN_TYPE_RADIAL)))) + { + cairo_int_status_t status; + + status = _cairo_xlib_surface_acquire_pattern_surface (dst, src, + src_content, + src_x, src_y, + width, height, + src_out, + src_attr); + if (unlikely (status)) + return status; + + if (mask) { + status = _cairo_xlib_surface_acquire_pattern_surface (dst, mask, + CAIRO_CONTENT_ALPHA, + mask_x, + mask_y, + width, + height, + mask_out, + mask_attr); + if (unlikely (status)) { + _cairo_pattern_release_surface (src, &(*src_out)->base, + src_attr); + return status; + } + } else { + *mask_out = NULL; + } + + return CAIRO_STATUS_SUCCESS; + } + + return _cairo_pattern_acquire_surfaces (src, mask, + &dst->base, + src_content, + src_x, src_y, + mask_x, mask_y, + width, height, + dst->buggy_pad_reflect ? + CAIRO_PATTERN_ACQUIRE_NO_REFLECT : + CAIRO_PATTERN_ACQUIRE_NONE, + (cairo_surface_t **) src_out, + (cairo_surface_t **) mask_out, + src_attr, mask_attr); +} + static cairo_int_status_t _cairo_xlib_surface_composite (cairo_operator_t op, const cairo_pattern_t *src_pattern, @@ -1930,18 +2179,15 @@ _cairo_xlib_surface_composite (cairo_operator_t op, _cairo_xlib_display_notify (dst->display); - status = _cairo_pattern_acquire_surfaces (src_pattern, mask_pattern, - &dst->base, - src_content, - src_x, src_y, - mask_x, mask_y, - width, height, - dst->buggy_pad_reflect ? - CAIRO_PATTERN_ACQUIRE_NO_REFLECT : - CAIRO_PATTERN_ACQUIRE_NONE, - (cairo_surface_t **) &src, - (cairo_surface_t **) &mask, - &src_attr, &mask_attr); + status = + _cairo_xlib_surface_acquire_pattern_surfaces (dst, + src_pattern, mask_pattern, + src_content, + src_x, src_y, + mask_x, mask_y, + width, height, + &src, &mask, + &src_attr, &mask_attr); if (unlikely (status)) return status; @@ -2083,6 +2329,7 @@ _cairo_xlib_surface_composite (cairo_operator_t op, return status; } +/* XXX move this out of core and into acquire_pattern_surface() above. */ static cairo_int_status_t _cairo_xlib_surface_solid_fill_rectangles (cairo_xlib_surface_t *surface, const cairo_color_t *color, @@ -2359,14 +2606,12 @@ _cairo_xlib_surface_composite_trapezoids (cairo_operator_t op, if (operation == DO_UNSUPPORTED) return UNSUPPORTED ("unsupported operation"); - status = _cairo_pattern_acquire_surface (pattern, &dst->base, - CAIRO_CONTENT_COLOR_ALPHA, - src_x, src_y, width, height, - dst->buggy_pad_reflect ? - CAIRO_PATTERN_ACQUIRE_NO_REFLECT : - CAIRO_PATTERN_ACQUIRE_NONE, - (cairo_surface_t **) &src, - &attributes); + status = _cairo_xlib_surface_acquire_pattern_surface (dst, + pattern, + CAIRO_CONTENT_COLOR_ALPHA, + src_x, src_y, + width, height, + &src, &attributes); if (unlikely (status)) return status; @@ -2752,10 +2997,15 @@ _cairo_xlib_surface_create_internal (Display *dpy, /* so we can use the XTile fallback */ surface->buggy_repeat = TRUE; } + surface->buggy_pad_reflect = screen_info->display->buggy_pad_reflect; if (! CAIRO_SURFACE_RENDER_HAS_EXTENDED_REPEAT (surface)) surface->buggy_pad_reflect = TRUE; + surface->buggy_gradients = screen_info->display->buggy_gradients; + if (! CAIRO_SURFACE_RENDER_HAS_GRADIENTS (surface)) + surface->buggy_gradients = TRUE; + surface->dst_picture = None; surface->src_picture = None; @@ -4212,15 +4462,13 @@ _cairo_xlib_surface_show_glyphs (void *abstract_dst, } } - status = _cairo_pattern_acquire_surface (src_pattern, &dst->base, - CAIRO_CONTENT_COLOR_ALPHA, - glyph_extents.x, glyph_extents.y, - glyph_extents.width, glyph_extents.height, - dst->buggy_pad_reflect ? - CAIRO_PATTERN_ACQUIRE_NO_REFLECT : - CAIRO_PATTERN_ACQUIRE_NONE, - (cairo_surface_t **) &src, - &attributes); + status = _cairo_xlib_surface_acquire_pattern_surface (dst, src_pattern, + dst->base.content, + glyph_extents.x, + glyph_extents.y, + glyph_extents.width, + glyph_extents.height, + &src, &attributes); if (unlikely (status)) goto BAIL0; }