mirror of
https://gitlab.freedesktop.org/cairo/cairo.git
synced 2026-05-02 21:18:05 +02:00
Add SRC and IN implementations to avoid CompositeGeneral in some cases hit by PDF rendering
The patch implements a few more operations with special cases MMX code. On my laptop, applying the patch to cairo speeds up the benchmark (rendering page 14 of a PDF file[*]) from 20.9 seconds to 14.9 seconds, which is an improvement of 28.6%. [*] http://people.redhat.com/jakub/prelink.pdf This also benefits the recently added unaligned_clip perf case: image-rgb unaligned_clip-100 0.11 -> 0.06: 1.65x speedup ▋ image-rgba unaligned_clip-100 0.11 -> 0.06: 1.64x speedup ▋
This commit is contained in:
parent
d5531c4f50
commit
cf1d95e714
3 changed files with 366 additions and 1 deletions
|
|
@ -2134,6 +2134,232 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_operator_t op,
|
|||
_mm_empty ();
|
||||
}
|
||||
|
||||
void
|
||||
fbCompositeIn_nx8x8mmx (pixman_operator_t op,
|
||||
PicturePtr pSrc,
|
||||
PicturePtr pMask,
|
||||
PicturePtr pDst,
|
||||
INT16 xSrc,
|
||||
INT16 ySrc,
|
||||
INT16 xMask,
|
||||
INT16 yMask,
|
||||
INT16 xDst,
|
||||
INT16 yDst,
|
||||
CARD16 width,
|
||||
CARD16 height)
|
||||
{
|
||||
CARD8 *dstLine, *dst;
|
||||
CARD8 *maskLine, *mask;
|
||||
FbStride dstStride, maskStride;
|
||||
CARD16 w;
|
||||
CARD32 src;
|
||||
CARD8 sa;
|
||||
__m64 vsrc, vsrca;
|
||||
|
||||
fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
|
||||
fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
|
||||
|
||||
fbComposeGetSolid(pSrc, pDst, src);
|
||||
|
||||
sa = src >> 24;
|
||||
if (sa == 0)
|
||||
return;
|
||||
|
||||
vsrc = load8888(src);
|
||||
vsrca = expand_alpha(vsrc);
|
||||
|
||||
while (height--)
|
||||
{
|
||||
dst = dstLine;
|
||||
dstLine += dstStride;
|
||||
mask = maskLine;
|
||||
maskLine += maskStride;
|
||||
w = width;
|
||||
|
||||
if ((((unsigned long)pDst & 3) == 0) &&
|
||||
(((unsigned long)pSrc & 3) == 0))
|
||||
{
|
||||
while (w >= 4)
|
||||
{
|
||||
CARD32 m;
|
||||
__m64 vmask;
|
||||
__m64 vdest;
|
||||
|
||||
m = 0;
|
||||
|
||||
vmask = load8888 (*(CARD32 *)mask);
|
||||
vdest = load8888 (*(CARD32 *)dst);
|
||||
|
||||
*(CARD32 *)dst = store8888 (in (in (vsrca, vmask), vdest));
|
||||
|
||||
dst += 4;
|
||||
mask += 4;
|
||||
w -= 4;
|
||||
}
|
||||
}
|
||||
|
||||
while (w--)
|
||||
{
|
||||
CARD16 tmp;
|
||||
CARD8 a;
|
||||
CARD32 m, d;
|
||||
CARD32 r;
|
||||
|
||||
a = *mask++;
|
||||
d = *dst;
|
||||
|
||||
m = FbInU (sa, 0, a, tmp);
|
||||
r = FbInU (m, 0, d, tmp);
|
||||
|
||||
*dst++ = r;
|
||||
}
|
||||
}
|
||||
|
||||
_mm_empty();
|
||||
}
|
||||
|
||||
void
|
||||
fbCompositeIn_8x8mmx (pixman_operator_t op,
|
||||
PicturePtr pSrc,
|
||||
PicturePtr pMask,
|
||||
PicturePtr pDst,
|
||||
INT16 xSrc,
|
||||
INT16 ySrc,
|
||||
INT16 xMask,
|
||||
INT16 yMask,
|
||||
INT16 xDst,
|
||||
INT16 yDst,
|
||||
CARD16 width,
|
||||
CARD16 height)
|
||||
{
|
||||
CARD8 *dstLine, *dst;
|
||||
CARD8 *srcLine, *src;
|
||||
FbStride srcStride, dstStride;
|
||||
CARD16 w;
|
||||
|
||||
fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
|
||||
fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 1);
|
||||
|
||||
while (height--)
|
||||
{
|
||||
dst = dstLine;
|
||||
dstLine += dstStride;
|
||||
src = srcLine;
|
||||
srcLine += srcStride;
|
||||
w = width;
|
||||
|
||||
if ((((unsigned long)pDst & 3) == 0) &&
|
||||
(((unsigned long)pSrc & 3) == 0))
|
||||
{
|
||||
while (w >= 4)
|
||||
{
|
||||
CARD32 *s = (CARD32 *)src;
|
||||
CARD32 *d = (CARD32 *)dst;
|
||||
|
||||
*d = store8888 (in (load8888 (*s), load8888 (*d)));
|
||||
|
||||
w -= 4;
|
||||
dst += 4;
|
||||
src += 4;
|
||||
}
|
||||
}
|
||||
|
||||
while (w--)
|
||||
{
|
||||
CARD8 s, d;
|
||||
CARD16 tmp;
|
||||
|
||||
s = *src;
|
||||
d = *dst;
|
||||
|
||||
*dst = FbInU (s, 0, d, tmp);
|
||||
|
||||
src++;
|
||||
dst++;
|
||||
}
|
||||
}
|
||||
|
||||
_mm_empty ();
|
||||
}
|
||||
|
||||
void
|
||||
fbCompositeSrcAdd_8888x8x8mmx (pixman_operator_t op,
|
||||
PicturePtr pSrc,
|
||||
PicturePtr pMask,
|
||||
PicturePtr pDst,
|
||||
INT16 xSrc,
|
||||
INT16 ySrc,
|
||||
INT16 xMask,
|
||||
INT16 yMask,
|
||||
INT16 xDst,
|
||||
INT16 yDst,
|
||||
CARD16 width,
|
||||
CARD16 height)
|
||||
{
|
||||
CARD8 *dstLine, *dst;
|
||||
CARD8 *maskLine, *mask;
|
||||
FbStride dstStride, maskStride;
|
||||
CARD16 w;
|
||||
CARD32 src;
|
||||
CARD8 sa;
|
||||
__m64 vsrc, vsrca;
|
||||
|
||||
fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
|
||||
fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
|
||||
|
||||
fbComposeGetSolid(pSrc, pDst, src);
|
||||
|
||||
sa = src >> 24;
|
||||
if (sa == 0)
|
||||
return;
|
||||
|
||||
vsrc = load8888(src);
|
||||
vsrca = expand_alpha(vsrc);
|
||||
|
||||
while (height--)
|
||||
{
|
||||
dst = dstLine;
|
||||
dstLine += dstStride;
|
||||
mask = maskLine;
|
||||
maskLine += maskStride;
|
||||
w = width;
|
||||
|
||||
if ((((unsigned long)pMask & 3) == 0) &&
|
||||
(((unsigned long)pDst & 3) == 0))
|
||||
{
|
||||
while (w >= 4)
|
||||
{
|
||||
__m64 vmask = load8888 (*(CARD32 *)mask);
|
||||
__m64 vdest = load8888 (*(CARD32 *)dst);
|
||||
|
||||
*(CARD32 *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
|
||||
|
||||
w -= 4;
|
||||
dst += 4;
|
||||
mask += 4;
|
||||
}
|
||||
}
|
||||
|
||||
while (w--)
|
||||
{
|
||||
CARD16 tmp;
|
||||
CARD16 a;
|
||||
CARD32 m, d;
|
||||
CARD32 r;
|
||||
|
||||
a = *mask++;
|
||||
d = *dst;
|
||||
|
||||
m = FbInU (sa, 0, a, tmp);
|
||||
r = FbAdd (m, d, 0, tmp);
|
||||
|
||||
*dst++ = r;
|
||||
}
|
||||
}
|
||||
|
||||
_mm_empty();
|
||||
}
|
||||
|
||||
void
|
||||
fbCompositeSrcAdd_8000x8000mmx (pixman_operator_t op,
|
||||
PicturePtr pSrc,
|
||||
|
|
|
|||
|
|
@ -44,6 +44,20 @@ Bool fbHaveMMX(void);
|
|||
pixman_private
|
||||
void fbComposeSetupMMX(void);
|
||||
|
||||
pixman_private
|
||||
void fbCompositeIn_nx8x8mmx (pixman_operator_t op,
|
||||
PicturePtr pSrc,
|
||||
PicturePtr pMask,
|
||||
PicturePtr pDst,
|
||||
INT16 xSrc,
|
||||
INT16 ySrc,
|
||||
INT16 xMask,
|
||||
INT16 yMask,
|
||||
INT16 xDst,
|
||||
INT16 yDst,
|
||||
CARD16 width,
|
||||
CARD16 height);
|
||||
|
||||
pixman_private
|
||||
void fbCompositeSolidMask_nx8888x0565Cmmx (pixman_operator_t op,
|
||||
PicturePtr pSrc,
|
||||
|
|
@ -109,6 +123,35 @@ void fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_operator_t op,
|
|||
INT16 yDst,
|
||||
CARD16 width,
|
||||
CARD16 height);
|
||||
|
||||
pixman_private
|
||||
void fbCompositeSrcAdd_8888x8x8mmx (pixman_operator_t op,
|
||||
PicturePtr pSrc,
|
||||
PicturePtr pMask,
|
||||
PicturePtr pDst,
|
||||
INT16 xSrc,
|
||||
INT16 ySrc,
|
||||
INT16 xMask,
|
||||
INT16 yMask,
|
||||
INT16 xDst,
|
||||
INT16 yDst,
|
||||
CARD16 width,
|
||||
CARD16 height);
|
||||
|
||||
pixman_private
|
||||
void fbCompositeIn_8x8mmx (pixman_operator_t op,
|
||||
PicturePtr pSrc,
|
||||
PicturePtr pMask,
|
||||
PicturePtr pDst,
|
||||
INT16 xSrc,
|
||||
INT16 ySrc,
|
||||
INT16 xMask,
|
||||
INT16 yMask,
|
||||
INT16 xDst,
|
||||
INT16 yDst,
|
||||
CARD16 width,
|
||||
CARD16 height);
|
||||
|
||||
pixman_private
|
||||
void fbCompositeSrcAdd_8000x8000mmx (pixman_operator_t op,
|
||||
PicturePtr pSrc,
|
||||
|
|
|
|||
|
|
@ -843,6 +843,58 @@ fbCompositeSrcAdd_8888x8888 (pixman_operator_t op,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fbCompositeSrcAdd_8888x8x8 (pixman_operator_t op,
|
||||
PicturePtr pSrc,
|
||||
PicturePtr pMask,
|
||||
PicturePtr pDst,
|
||||
INT16 xSrc,
|
||||
INT16 ySrc,
|
||||
INT16 xMask,
|
||||
INT16 yMask,
|
||||
INT16 xDst,
|
||||
INT16 yDst,
|
||||
CARD16 width,
|
||||
CARD16 height)
|
||||
{
|
||||
CARD8 *dstLine, *dst;
|
||||
CARD8 *maskLine, *mask;
|
||||
FbStride dstStride, maskStride;
|
||||
CARD16 w;
|
||||
CARD32 src;
|
||||
CARD8 sa;
|
||||
|
||||
fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
|
||||
fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
|
||||
fbComposeGetSolid (pSrc, pDst, src);
|
||||
sa = (src >> 24);
|
||||
|
||||
while (height--)
|
||||
{
|
||||
dst = dstLine;
|
||||
dstLine += dstStride;
|
||||
mask = maskLine;
|
||||
maskLine += maskStride;
|
||||
w = width;
|
||||
|
||||
while (w--)
|
||||
{
|
||||
CARD16 tmp;
|
||||
CARD16 a;
|
||||
CARD32 m, d;
|
||||
CARD32 r;
|
||||
|
||||
a = *mask++;
|
||||
d = *dst;
|
||||
|
||||
m = FbInU (sa, 0, a, tmp);
|
||||
r = FbAdd (m, d, 0, tmp);
|
||||
|
||||
*dst++ = r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fbCompositeSrcAdd_1000x1000 (pixman_operator_t op,
|
||||
PicturePtr pSrc,
|
||||
|
|
@ -1759,6 +1811,26 @@ pixman_composite (pixman_operator_t op,
|
|||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((pSrc->format_code == PICT_a8r8g8b8 ||
|
||||
pSrc->format_code == PICT_a8b8g8r8) &&
|
||||
srcRepeat &&
|
||||
pMask->format_code == PICT_a8 &&
|
||||
pDst->format_code == PICT_a8)
|
||||
{
|
||||
#ifdef USE_MMX
|
||||
if (fbHaveMMX())
|
||||
{
|
||||
srcRepeat = FALSE;
|
||||
|
||||
func = fbCompositeSrcAdd_8888x8x8mmx;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
func = fbCompositeSrcAdd_8888x8x8;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PIXMAN_OPERATOR_SRC:
|
||||
if (pMask)
|
||||
|
|
@ -1798,10 +1870,34 @@ pixman_composite (pixman_operator_t op,
|
|||
}
|
||||
}
|
||||
break;
|
||||
case PIXMAN_OPERATOR_IN:
|
||||
#ifdef USE_MMX
|
||||
if (pSrc->format_code == PICT_a8 &&
|
||||
pDst->format_code == PICT_a8 &&
|
||||
!pMask)
|
||||
{
|
||||
if (fbHaveMMX())
|
||||
func = fbCompositeIn_8x8mmx;
|
||||
}
|
||||
else if (srcRepeat && pMask && !pMask->componentAlpha &&
|
||||
(pSrc->format_code == PICT_a8r8g8b8 ||
|
||||
pSrc->format_code == PICT_a8b8g8r8) &&
|
||||
(pMask->format_code == PICT_a8) &&
|
||||
pDst->format_code == PICT_a8)
|
||||
{
|
||||
if (fbHaveMMX())
|
||||
{
|
||||
srcRepeat = FALSE;
|
||||
func = fbCompositeIn_nx8x8mmx;
|
||||
}
|
||||
}
|
||||
#else
|
||||
func = NULL;
|
||||
#endif
|
||||
break;
|
||||
case PIXMAN_OPERATOR_CLEAR:
|
||||
case PIXMAN_OPERATOR_DST:
|
||||
case PIXMAN_OPERATOR_OVER_REVERSE:
|
||||
case PIXMAN_OPERATOR_IN:
|
||||
case PIXMAN_OPERATOR_IN_REVERSE:
|
||||
case PIXMAN_OPERATOR_OUT:
|
||||
case PIXMAN_OPERATOR_OUT_REVERSE:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue