Replace wide integer divide algorithms with trivial bit-at-a-time code. Original code was of unclear provenance, this new code is completely different.

This commit is contained in:
Keith Packard 2005-07-30 12:57:54 +00:00
parent 05d84a0a1e
commit b47c0050f9
3 changed files with 60 additions and 418 deletions

View file

@ -1,3 +1,14 @@
2005-07-30 Keith Packard <keithp@keithp.com>
* src/cairo-wideint.c: (_cairo_int32x32_64_mul),
(_cairo_uint64_divrem), (_cairo_uint128_divrem):
* src/cairo-wideint.h:
Replace wide integer divide algorithms with
trivial bit-at-a-time code. Original code was
of unclear provenance, this new code is
completely different.
2005-07-29 Carl Worth <cworth@cworth.org>
* ROADMAP: Remove completed 0.6 tasks. Add cairo_surface_flush to

View file

@ -1,5 +1,5 @@
/*
* $Id: cairo-wideint.c,v 1.5 2005-06-03 21:51:57 cworth Exp $
* $Id: cairo-wideint.c,v 1.6 2005-07-30 19:57:54 keithp Exp $
*
* Copyright © 2004 Keith Packard
*
@ -36,22 +36,6 @@
#include "cairoint.h"
#if !HAVE_UINT64_T || !HAVE_UINT128_T
static const unsigned char top_bit[256] =
{
0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
};
#endif
#if HAVE_UINT64_T
#define _cairo_uint32s_to_uint64(h,l) ((uint64_t) (h) << 32 | (l))
@ -157,7 +141,8 @@ _cairo_uint32x32_64_mul (uint32_t a, uint32_t b)
cairo_int64_t
_cairo_int32x32_64_mul (int32_t a, int32_t b)
{
s = _cairo_uint32x32_64_mul ((uint32_t) a, (uint32_t b));
cairo_int64_t s;
s = _cairo_uint32x32_64_mul ((uint32_t) a, (uint32_t) b);
if (a < 0)
s.hi -= b;
if (b < 0)
@ -270,200 +255,38 @@ _cairo_uint64_negate (cairo_uint64_t a)
}
/*
* The design of this algorithm comes from GCC,
* but the actual implementation is new
* Simple bit-at-a-time divide.
*/
static const int
_cairo_leading_zeros32 (uint32_t i)
{
int top;
if (i < 0x100)
top = 0;
else if (i < 0x10000)
top = 8;
else if (i < 0x1000000)
top = 16;
else
top = 24;
top = top + top_bit [i >> top];
return 32 - top;
}
typedef struct _cairo_uquorem32_t {
uint32_t quo;
uint32_t rem;
} cairo_uquorem32_t;
/*
* den >= num.hi
*/
static const cairo_uquorem32_t
_cairo_uint64x32_normalized_divrem (cairo_uint64_t num, uint32_t den)
{
cairo_uquorem32_t qr;
uint32_t q0, q1, r0, r1;
uint16_t d0, d1;
uint32_t t;
d0 = den & 0xffff;
d1 = den >> 16;
q1 = num.hi / d1;
r1 = num.hi % d1;
t = q1 * d0;
r1 = (r1 << 16) | (num.lo >> 16);
if (r1 < t)
{
q1--;
r1 += den;
if (r1 >= den && r1 < t)
{
q1--;
r1 += den;
}
}
r1 -= t;
q0 = r1 / d1;
r0 = r1 % d1;
t = q0 * d0;
r0 = (r0 << 16) | (num.lo & 0xffff);
if (r0 < t)
{
q0--;
r0 += den;
if (r0 >= den && r0 < t)
{
q0--;
r0 += den;
}
}
r0 -= t;
qr.quo = (q1 << 16) | q0;
qr.rem = r0;
return qr;
}
cairo_uquorem64_t
_cairo_uint64_divrem (cairo_uint64_t num, cairo_uint64_t den)
{
cairo_uquorem32_t qr32;
cairo_uquorem64_t qr;
int norm;
uint32_t q1, q0, r1, r0;
if (den.hi == 0)
cairo_uint64_t bit;
cairo_uint64_t quo;
bit = _cairo_uint32_to_uint64 (1);
/* normalize to make den >= num, but not overflow */
while (_cairo_uint64_lt (den, num) && (den.hi & 0x80000000) == 0)
{
if (den.lo > num.hi)
{
/* 0q = nn / 0d */
norm = _cairo_leading_zeros32 (den.lo);
if (norm)
{
den.lo <<= norm;
num = _cairo_uint64_lsl (num, norm);
}
q1 = 0;
}
else
{
/* qq = NN / 0d */
if (den.lo == 0)
den.lo = 1 / den.lo;
norm = _cairo_leading_zeros32 (den.lo);
if (norm)
{
cairo_uint64_t num1;
den.lo <<= norm;
num1 = _cairo_uint64_rsl (num, 32 - norm);
qr32 = _cairo_uint64x32_normalized_divrem (num1, den.lo);
q1 = qr32.quo;
num.hi = qr32.rem;
num.lo <<= norm;
}
else
{
num.hi -= den.lo;
q1 = 1;
}
}
qr32 = _cairo_uint64x32_normalized_divrem (num, den.lo);
q0 = qr32.quo;
r1 = 0;
r0 = qr32.rem >> norm;
bit = _cairo_uint64_lsl (bit, 1);
den = _cairo_uint64_lsl (den, 1);
}
else
quo = _cairo_uint32_to_uint64 (0);
/* generate quotient, one bit at a time */
while (bit.hi | bit.lo)
{
if (den.hi > num.hi)
if (_cairo_uint64_le (den, num))
{
/* 00 = nn / DD */
q0 = q1 = 0;
r0 = num.lo;
r1 = num.hi;
}
else
{
/* 0q = NN / dd */
norm = _cairo_leading_zeros32 (den.hi);
if (norm == 0)
{
if (num.hi > den.hi || num.lo >= den.lo)
{
q0 = 1;
num = _cairo_uint64_sub (num, den);
}
else
{
q0 = 0;
}
q1 = 0;
r0 = num.lo;
r1 = num.hi;
}
else
{
cairo_uint64_t num1;
cairo_uint64_t part;
num1 = _cairo_uint64_rsl (num, 32 - norm);
den = _cairo_uint64_lsl (den, norm);
qr32 = _cairo_uint64x32_normalized_divrem (num1, den.hi);
part = _cairo_uint32x32_64_mul (qr32.quo, den.lo);
q0 = qr32.quo;
num.lo <<= norm;
num.hi = qr32.rem;
if (_cairo_uint64_gt (part, num))
{
q0--;
part = _cairo_uint64_sub (part, den);
}
q1 = 0;
num = _cairo_uint64_sub (num, part);
num = _cairo_uint64_rsl (num, norm);
r0 = num.lo;
r1 = num.hi;
}
num = _cairo_uint64_sub (num, den);
quo = _cairo_uint64_add (quo, bit);
}
bit = _cairo_uint64_rsl (bit, 1);
den = _cairo_uint64_rsl (den, 1);
}
qr.quo.lo = q0;
qr.quo.hi = q1;
qr.rem.lo = r0;
qr.rem.hi = r1;
qr.quo = quo;
qr.rem = num;
return qr;
}
@ -754,234 +577,42 @@ _cairo_uint128_eq (cairo_uint128_t a, cairo_uint128_t b)
_cairo_uint64_eq (a.lo, b.lo));
}
/*
* The design of this algorithm comes from GCC,
* but the actual implementation is new
*/
/*
* den >= num.hi
*/
static cairo_uquorem64_t
_cairo_uint128x64_normalized_divrem (cairo_uint128_t num, cairo_uint64_t den)
{
cairo_uquorem64_t qr64;
cairo_uquorem64_t qr;
uint32_t q0, q1;
cairo_uint64_t r0, r1;
uint32_t d0, d1;
cairo_uint64_t t;
d0 = uint64_lo32 (den);
d1 = uint64_hi32 (den);
qr64 = _cairo_uint64_divrem (num.hi, _cairo_uint32_to_uint64 (d1));
q1 = _cairo_uint64_to_uint32 (qr64.quo);
r1 = qr64.rem;
t = _cairo_uint32x32_64_mul (q1, d0);
r1 = _cairo_uint64_add (_cairo_uint64_lsl (r1, 32),
_cairo_uint64_rsl (num.lo, 32));
if (_cairo_uint64_lt (r1, t))
{
q1--;
r1 = _cairo_uint64_add (r1, den);
if (_cairo_uint64_ge (r1, den) && _cairo_uint64_lt (r1, t))
{
q1--;
r1 = _cairo_uint64_add (r1, den);
}
}
r1 = _cairo_uint64_sub (r1, t);
qr64 = _cairo_uint64_divrem (r1, _cairo_uint32_to_uint64 (d1));
q0 = _cairo_uint64_to_uint32 (qr64.quo);
r0 = qr64.rem;
t = _cairo_uint32x32_64_mul (q0, d0);
r0 = _cairo_uint64_add (_cairo_uint64_lsl (r0, 32),
_cairo_uint32_to_uint64 (_cairo_uint64_to_uint32 (num.lo)));
if (_cairo_uint64_lt (r0, t))
{
q0--;
r0 = _cairo_uint64_add (r0, den);
if (_cairo_uint64_ge (r0, den) && _cairo_uint64_lt (r0, t))
{
q0--;
r0 = _cairo_uint64_add (r0, den);
}
}
r0 = _cairo_uint64_sub (r0, t);
qr.quo = _cairo_uint32s_to_uint64 (q1, q0);
qr.rem = r0;
return qr;
}
#if HAVE_UINT64_T
static int
_cairo_leading_zeros64 (cairo_uint64_t q)
{
int top = 0;
if (q >= (uint64_t) 0x10000 << 16)
{
top += 32;
q >>= 32;
}
if (q >= (uint64_t) 0x10000)
{
top += 16;
q >>= 16;
}
if (q >= (uint64_t) 0x100)
{
top += 8;
q >>= 8;
}
top += top_bit [q];
return 64 - top;
}
#define _cairo_msbset64(q) (q & ((uint64_t) 1 << 63))
#else
static const int
_cairo_leading_zeros64 (cairo_uint64_t d)
{
if (d.hi)
return _cairo_leading_zeros32 (d.hi);
else
return 32 + _cairo_leading_zeros32 (d.lo);
}
#define _cairo_msbset64(q) (q.hi & ((uint32_t) 1 << 31))
#endif
cairo_uquorem128_t
_cairo_uint128_divrem (cairo_uint128_t num, cairo_uint128_t den)
{
cairo_uquorem64_t qr64;
cairo_uquorem128_t qr;
int norm;
cairo_uint64_t q1, q0, r1, r0;
if (_cairo_uint64_eq (den.hi, _cairo_uint32_to_uint64 (0)))
cairo_uint128_t bit;
cairo_uint128_t quo;
bit = _cairo_uint32_to_uint128 (1);
/* normalize to make den >= num, but not overflow */
while (_cairo_uint128_lt (den, num) && !_cairo_msbset64(den.hi))
{
if (_cairo_uint64_gt (den.lo, num.hi))
{
/* 0q = nn / 0d */
norm = _cairo_leading_zeros64 (den.lo);
if (norm)
{
den.lo = _cairo_uint64_lsl (den.lo, norm);
num = _cairo_uint128_lsl (num, norm);
}
q1 = _cairo_uint32_to_uint64 (0);
}
else
{
/* qq = NN / 0d */
if (_cairo_uint64_eq (den.lo, _cairo_uint32_to_uint64 (0)))
den.lo = _cairo_uint64_divrem (_cairo_uint32_to_uint64 (1),
den.lo).quo;
norm = _cairo_leading_zeros64 (den.lo);
if (norm)
{
cairo_uint128_t num1;
den.lo = _cairo_uint64_lsl (den.lo, norm);
num1 = _cairo_uint128_rsl (num, 64 - norm);
qr64 = _cairo_uint128x64_normalized_divrem (num1, den.lo);
q1 = qr64.quo;
num.hi = qr64.rem;
num.lo = _cairo_uint64_lsl (num.lo, norm);
}
else
{
num.hi = _cairo_uint64_sub (num.hi, den.lo);
q1 = _cairo_uint32_to_uint64 (1);
}
}
qr64 = _cairo_uint128x64_normalized_divrem (num, den.lo);
q0 = qr64.quo;
r1 = _cairo_uint32_to_uint64 (0);
r0 = _cairo_uint64_rsl (qr64.rem, norm);
bit = _cairo_uint128_lsl (bit, 1);
den = _cairo_uint128_lsl (den, 1);
}
else
quo = _cairo_uint32_to_uint128 (0);
/* generate quotient, one bit at a time */
while (_cairo_uint128_ne (bit, _cairo_uint32_to_uint128(0)))
{
if (_cairo_uint64_gt (den.hi, num.hi))
if (_cairo_uint128_le (den, num))
{
/* 00 = nn / DD */
q0 = q1 = _cairo_uint32_to_uint64 (0);
r0 = num.lo;
r1 = num.hi;
}
else
{
/* 0q = NN / dd */
norm = _cairo_leading_zeros64 (den.hi);
if (norm == 0)
{
if (_cairo_uint64_gt (num.hi, den.hi) ||
_cairo_uint64_ge (num.lo, den.lo))
{
q0 = _cairo_uint32_to_uint64 (1);
num = _cairo_uint128_sub (num, den);
}
else
{
q0 = _cairo_uint32_to_uint64 (0);
}
q1 = _cairo_uint32_to_uint64 (0);
r0 = num.lo;
r1 = num.hi;
}
else
{
cairo_uint128_t num1;
cairo_uint128_t part;
num1 = _cairo_uint128_rsl (num, 64 - norm);
den = _cairo_uint128_lsl (den, norm);
qr64 = _cairo_uint128x64_normalized_divrem (num1, den.hi);
part = _cairo_uint64x64_128_mul (qr64.quo, den.lo);
q0 = qr64.quo;
num.lo = _cairo_uint64_lsl (num.lo, norm);
num.hi = qr64.rem;
if (_cairo_uint128_gt (part, num))
{
q0 = _cairo_uint64_sub (q0, _cairo_uint32_to_uint64 (1));
part = _cairo_uint128_sub (part, den);
}
q1 = _cairo_uint32_to_uint64 (0);
num = _cairo_uint128_sub (num, part);
num = _cairo_uint128_rsl (num, norm);
r0 = num.lo;
r1 = num.hi;
}
num = _cairo_uint128_sub (num, den);
quo = _cairo_uint128_add (quo, bit);
}
bit = _cairo_uint128_rsl (bit, 1);
den = _cairo_uint128_rsl (den, 1);
}
qr.quo.lo = q0;
qr.quo.hi = q1;
qr.rem.lo = r0;
qr.rem.hi = r1;
qr.quo = quo;
qr.rem = num;
return qr;
}

View file

@ -1,5 +1,5 @@
/*
* $Id: cairo-wideint.h,v 1.10 2005-05-10 19:42:32 cworth Exp $
* $Id: cairo-wideint.h,v 1.11 2005-07-30 19:57:54 keithp Exp $
*
* Copyright © 2004 Keith Packard
*
@ -85,7 +85,7 @@ cairo_int64_t I _cairo_int32_to_int64(int32_t i);
#define _cairo_int64_add(a,b) _cairo_uint64_add (a,b)
#define _cairo_int64_sub(a,b) _cairo_uint64_sub (a,b)
#define _cairo_int64_mul(a,b) _cairo_uint64_mul (a,b)
int I _cairo_int32x32_64_mul (int32_t a, int32_t b);
cairo_int64_t I _cairo_int32x32_64_mul (int32_t a, int32_t b);
int I _cairo_int64_lt (cairo_uint64_t a, cairo_uint64_t b);
#define _cairo_int64_eq(a,b) _cairo_uint64_eq (a,b)
#define _cairo_int64_lsl(a,b) _cairo_uint64_lsl (a,b)
@ -208,7 +208,7 @@ cairo_int128_t I _cairo_int64_to_int128 (cairo_int64_t i);
#define _cairo_int128_add(a,b) _cairo_uint128_add(a,b)
#define _cairo_int128_sub(a,b) _cairo_uint128_sub(a,b)
#define _cairo_int128_mul(a,b) _cairo_uint128_mul(a,b)
cairo_uint128_t I _cairo_int64x64_128_mul (cairo_int64_t a, cairo_int64_t b);
cairo_int128_t I _cairo_int64x64_128_mul (cairo_int64_t a, cairo_int64_t b);
#define _cairo_int128_lsl(a,b) _cairo_uint128_lsl(a,b)
#define _cairo_int128_rsl(a,b) _cairo_uint128_rsl(a,b)
#define _cairo_int128_rsa(a,b) _cairo_uint128_rsa(a,b)