Replace some fixed-point maths with floating point maths. Much faster.

2026-05-07 08:48:02 +02:00 · 2024-11-04 15:07:58 +13:00 · 2024-11-04 15:07:58 +13:00 · 64ac62b397
commit 64ac62b397
parent 0ae7e60737
2 changed files with 17 additions and 114 deletions
--- a/src/base/ftcalc.c
+++ b/src/base/ftcalc.c
@ -37,6 +37,7 @@
 #include <freetype/internal/ftcalc.h>
 #include <freetype/internal/ftdebug.h>
 #include <freetype/internal/ftobjs.h>
+#include <math.h>


 #ifdef FT_MULFIX_ASSEMBLER
@ -799,91 +800,15 @@
  FT_BASE_DEF( FT_UInt32 )
  FT_Vector_NormLen( FT_Vector*  vector )
  {
-    FT_Int32   x_ = vector->x;
-    FT_Int32   y_ = vector->y;
-    FT_Int32   b, z;
-    FT_UInt32  x, y, u, v, l;
-    FT_Int     sx = 1, sy = 1, shift;
-
-
-    FT_MOVE_SIGN( FT_UInt32, x_, x, sx );
-    FT_MOVE_SIGN( FT_UInt32, y_, y, sy );
-
-    /* trivial cases */
-    if ( x == 0 )
-    {
-      if ( y > 0 )
-        vector->y = sy * 0x10000;
-      return y;
-    }
-    else if ( y == 0 )
-    {
-      if ( x > 0 )
-        vector->x = sx * 0x10000;
-      return x;
-    }
-
-    /* Estimate length and prenormalize by shifting so that */
-    /* the new approximate length is between 2/3 and 4/3.   */
-    /* The magic constant 0xAAAAAAAAUL (2/3 of 2^32) helps  */
-    /* achieve this in 16.16 fixed-point representation.    */
-    l = x > y ? x + ( y >> 1 )
-              : y + ( x >> 1 );
-
-    shift  = 31 - FT_MSB( l );
-    shift -= 15 + ( l >= ( 0xAAAAAAAAUL >> shift ) );
-
-    if ( shift > 0 )
-    {
-      x <<= shift;
-      y <<= shift;
-
-      /* re-estimate length for tiny vectors */
-      l = x > y ? x + ( y >> 1 )
-                : y + ( x >> 1 );
-    }
-    else
-    {
-      x >>= -shift;
-      y >>= -shift;
-      l >>= -shift;
-    }
-
-    /* lower linear approximation for reciprocal length minus one */
-    b = 0x10000 - (FT_Int32)l;
-
-    x_ = (FT_Int32)x;
-    y_ = (FT_Int32)y;
-
-    /* Newton's iterations */
-    do
-    {
-      u = (FT_UInt32)( x_ + ( x_ * b >> 16 ) );
-      v = (FT_UInt32)( y_ + ( y_ * b >> 16 ) );
-
-      /* Normalized squared length in the parentheses approaches 2^32. */
-      /* On two's complement systems, converting to signed gives the   */
-      /* difference with 2^32 even if the expression wraps around.     */
-      z = -(FT_Int32)( u * u + v * v ) / 0x200;
-      z = z * ( ( 0x10000 + b ) >> 8 ) / 0x10000;
-
-      b += z;
-
-    } while ( z > 0 );
-
-    vector->x = sx < 0 ? -(FT_Pos)u : (FT_Pos)u;
-    vector->y = sy < 0 ? -(FT_Pos)v : (FT_Pos)v;
-
-    /* Conversion to signed helps to recover from likely wrap around */
-    /* in calculating the prenormalized length, because it gives the */
-    /* correct difference with 2^32 on two's complement systems.     */
-    l = (FT_UInt32)( 0x10000 + (FT_Int32)( u * x + v * y ) / 0x10000 );
-    if ( shift > 0 )
-      l = ( l + ( 1 << ( shift - 1 ) ) ) >> shift;
-    else
-      l <<= -shift;
-
-    return l;
+    // vx' = (x / len) * 65536 = (x * 65536) / len = ((vx / 65536) * 65536) / len = vx / len = vx / (sqrt(vx*vx + vy*vy) / 65536)
+    // = 65536 * vx / (sqrt(vx*vx + vy*vy)
+    const float vx = (float)vector->x;
+    const float vy = (float)vector->y;
+    const float vlen = sqrtf(vx*vx + vy*vy);
+    const float scale_factor = 65536.f / vlen;
+    vector->x = (FT_Pos)(vx * scale_factor);
+    vector->y = (FT_Pos)(vy * scale_factor);
+    return (FT_UInt32)vlen;
  }


--- a/src/base/fttrigon.c
+++ b/src/base/fttrigon.c
@ -32,6 +32,7 @@
 #include <freetype/internal/ftobjs.h>
 #include <freetype/internal/ftcalc.h>
 #include <freetype/fttrigon.h>
+#include <math.h>


  /* the Cordic shrink factor 0.858785336480436 * 2^32 */
@ -416,35 +417,12 @@
  FT_EXPORT_DEF( FT_Fixed )
  FT_Vector_Length( FT_Vector*  vec )
  {
-    FT_Int     shift;
-    FT_Vector  v;
-
-
-    if ( !vec )
-      return 0;
-
-    v = *vec;
-
-    /* handle trivial cases */
-    if ( v.x == 0 )
-    {
-      return FT_ABS( v.y );
-    }
-    else if ( v.y == 0 )
-    {
-      return FT_ABS( v.x );
-    }
-
-    /* general case */
-    shift = ft_trig_prenorm( &v );
-    ft_trig_pseudo_polarize( &v );
-
-    v.x = ft_trig_downscale( v.x );
-
-    if ( shift > 0 )
-      return ( v.x + ( 1L << ( shift - 1 ) ) ) >> shift;
-
-    return (FT_Fixed)( (FT_UInt32)v.x << -shift );
+    // len * 65536 = sqrtf(x*x + y*y) * 65536 = sqrt((vx / 65536) * (vx / 65536) + (vy / 65536) * (vy / 65536)) * 65536
+    // = sqrt((vx*vx + vy*vy) / 2^32) * 65536
+    // = sqrt((vx*vx + vy*vy) / 2^32) * sqrt(2^32)
+    // = sqrt((vx*vx + vy*vy) / 2^32 * 2^32)
+    // = sqrt(vx*vx + vy*vy)
+    return (FT_UInt32)sqrtf((float)vec->x*(float)vec->x + (float)vec->y*(float)vec->y);
  }