freetype-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[freetype2] gsoc-anurag-2022-final cd2e6217f: [dense] Migrate line drawi


From: Werner Lemberg
Subject: [freetype2] gsoc-anurag-2022-final cd2e6217f: [dense] Migrate line drawing and accumulation to fixed-point
Date: Sat, 19 Nov 2022 02:34:39 -0500 (EST)

branch: gsoc-anurag-2022-final
commit cd2e6217f31519315457d3eb17e1568ccf5d2529
Author: Anurag Thakur <anurag105csec21@bpitindia.edu.in>
Commit: Anurag Thakur <anurag105csec21@bpitindia.edu.in>

    [dense] Migrate line drawing and accumulation to fixed-point
    
    * src/dense/ftdense.h: (FT26D6, FT20D12): New typedefs
    
    * src/dense/ftdense.c: dense_render_line, dense_render_glyph now
    use fixed-point numbers for calculation
    
    Disabled SIMD for now
---
 src/dense/ftdense.c | 181 ++++++++++++++++++++++++++++------------------------
 src/dense/ftdense.h |   4 ++
 2 files changed, 103 insertions(+), 82 deletions(-)

diff --git a/src/dense/ftdense.c b/src/dense/ftdense.c
index a6b69f53e..c39d43e2a 100644
--- a/src/dense/ftdense.c
+++ b/src/dense/ftdense.c
@@ -81,57 +81,65 @@ dense_line_to( const FT_Vector* to, dense_worker* worker )
 void
 dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
 {
-  float from_x = worker->prev_x;
-  float from_y = worker->prev_y;
-  if ( from_y == toy )
-    return;
 
+  FT26D6 fx = worker->prev_x>>2;
+  FT26D6 fy = worker->prev_y>>2;
+
+  FT26D6 from_x = fx;
+  FT26D6 from_y = fy;
+
+
+  FT26D6 tx = tox>>2;
+  FT26D6 ty = toy>>2;
 
-  from_x /= 256.0;
-  from_y /= 256.0;
-  float to_x = tox / 256.0;
-  float to_y = toy / 256.0;
+  if ( fy == ty )
+    return;
 
+  FT26D6 to_x = tx;
+  FT26D6 to_y = ty;
 
-  float dir;
-  if ( from_y < to_y )
-    dir = 1;
-  else
+  int dir = 1;
+  if ( from_y >= to_y )
   {
     dir = -1;
-    FT_SWAP(from_x, to_x );
-    FT_SWAP(from_y, to_y );
+    FT_SWAP(from_x, to_x);
+    FT_SWAP(from_y, to_y);
   }
 
   // Clip to the height.
-  if ( from_y >= worker->m_h || to_y <= 0 )
+  if ( from_y >= worker->m_h<<6 || to_y <= 0 )
     return;
 
-  float dxdy = ( to_x - from_x ) / (float)( to_y - from_y );
+  FT26D6 deltax,deltay;
+  deltax = to_x - from_x;
+  deltay = to_y - from_y;
+
   if ( from_y < 0 )
   {
-    from_x -= from_y * dxdy;
+    from_x -= from_y * deltax/deltay;
     from_y = 0;
   }
-  if ( to_y > worker->m_h )
+
+  if ( to_y > worker->m_h<<6 )
   {
-    to_x -= ( to_y - worker->m_h ) * dxdy;
-    to_y = (float)worker->m_h;
+    to_x -= (( to_y - worker->m_h<<6 ) * deltax/deltay);
+    to_y = worker->m_h<<6;
   }
 
-  float  x       = from_x;
-  int    y0      = (int)from_y;
-  int    y_limit = (int)ceil( to_y );
-  float* m_a     = worker->m_a;
+  int    x       = from_x;
+  int    y0      = from_y>>6;
+  int    y_limit = (to_y + 0x3f)>>6;
+
+  FT20D12* m_a     = worker->m_a;
 
   for ( int y = y0; y < y_limit; y++ )
   {
     int   linestart = y * worker->m_w;
-    float dy        = fmin( y + 1.0f, to_y ) - fmax( (float)y, from_y );
-    float xnext     = x + dxdy * dy;
-    float d         = dy * dir;
+    FT26D6 dy        = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y );
+    FT26D6 xnext     = x + dy * deltax/deltay;
+    FT26D6 d         = dy * dir;
 
-    float x0, x1;
+    FT26D6 x0, x1;
     if ( x < xnext )
     {
       x0 = x;
@@ -143,40 +151,48 @@ dense_render_line( dense_worker* worker, FT_Pos tox, 
FT_Pos toy )
       x1 = x;
     }
 
-    /*
-    It's possible for x0 to be negative on the last scanline because of
-    floating-point inaccuracy That would cause an out-of-bounds array access at
-    index -1.
-    */
-    float x0floor = x0 <= 0.0f ? 0.0f : (float)floor( x0 );
 
-    int   x0i    = (int)x0floor;
-    float x1ceil = (float)ceil( x1 );
-    int   x1i    = (int)x1ceil;
+    int   x0i    = x0>>6;
+    FT26D6 x0floor = x0i<<6;
+
+
+    int   x1i    = (x1+0x3f)>>6;
+    FT26D6 x1ceil =  x1i <<6;
+
     if ( x1i <= x0i + 1 )
     {
-      float xmf = 0.5f * ( x + xnext ) - x0floor;
-      m_a[linestart + x0i] += d - d * xmf;
+      FT26D6 xmf = ( ( x + xnext )>>1) - x0floor;
+      m_a[linestart + x0i] += d * ((1<<6) - xmf);
       m_a[linestart + ( x0i + 1 )] += d * xmf;
     }
     else
     {
-      float s   = 1.0f / ( x1 - x0 );
-      float x0f = x0 - x0floor;
-      float a0  = 0.5f * s * ( 1.0f - x0f ) * ( 1.0f - x0f );
-      float x1f = x1 - x1ceil + 1.0f;
-      float am  = 0.5f * s * x1f * x1f;
+
+      FT26D6 oneOverS = x1 - x0;
+      FT26D6 x0f = x0 - x0floor;
+
+
+      FT26D6 oneMinusX0f = (1<<6) - x0f;
+      FT26D6 a0 = ((oneMinusX0f * oneMinusX0f) >> 1) / oneOverS;
+      FT26D6 x1f = x1 - x1ceil + (1<<6);
+      FT26D6 am = ((x1f * x1f) >> 1) / oneOverS;
+
       m_a[linestart + x0i] += d * a0;
+
       if ( x1i == x0i + 2 )
-        m_a[linestart + ( x0i + 1 )] += d * ( 1.0f - a0 - am );
+        m_a[linestart + ( x0i + 1 )] += d * ( (1<<6) - a0 - am );
       else
       {
-        float a1 = s * ( 1.5f - x0f );
+        FT26D6 a1 = (((1<<6) + (1<<5) - x0f) << 6) / oneOverS;
         m_a[linestart + ( x0i + 1 )] += d * ( a1 - a0 );
-        for ( int xi = x0i + 2; xi < x1i - 1; xi++ )
-          m_a[linestart + xi] += d * s;
-        float a2 = a1 + ( x1i - x0i - 3 ) * s;
-        m_a[linestart + ( x1i - 1 )] += d * ( 1.0f - a2 - am );
+
+        FT26D6 dTimesS = (d << 12) / oneOverS;
+
+        for ( FT26D6 xi = x0i + 2; xi < x1i - 1; xi++ )
+          m_a[linestart + xi] += dTimesS;
+
+        FT26D6 a2 = a1 + (( x1i - x0i - 3 )<<12)/oneOverS;
+        m_a[linestart + ( x1i - 1 )] += d * ( (1<<6) - a2 - am );
       }
       m_a[linestart + x1i] += d * am;
     }
@@ -364,48 +380,49 @@ dense_render_glyph( dense_worker* worker, const 
FT_Bitmap* target )
   FT_Error error = FT_Outline_Decompose( &( worker->outline ),
                                          &dense_decompose_funcs, worker );
   // Render into bitmap
-  const float* source = worker->m_a;
+  const FT20D12* source = worker->m_a;
   unsigned char* dest     = target->buffer;
   unsigned char* dest_end = target->buffer + worker->m_w * worker->m_h;
 
-#if FT_SSE4_1
-
-  __m128 offset = _mm_setzero_ps();
-  __m128i mask = _mm_set1_epi32(0x0c080400);
-  __m128 sign_mask = _mm_set1_ps(-0.f);
-  for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
-    __m128 x = _mm_load_ps(&source[i]);
-    x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 
4)));
-    x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
-    x = _mm_add_ps(x, offset);
-    __m128 y = _mm_andnot_ps(sign_mask, x);  // fabs(x)
-    y = _mm_min_ps(y, _mm_set1_ps(1.0f));
-    y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
-    __m128i z = _mm_cvtps_epi32(y);
-    z = _mm_shuffle_epi8(z, mask);
-    _mm_store_ss((float *)&dest[i], (__m128)z);
-    offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
-  }
-
-#else /* FT_SSE4_1 */
+//#if FT_SSE4_1
+
+  // __m128 offset = _mm_setzero_ps();
+  // __m128i mask = _mm_set1_epi32(0x0c080400);
+  // __m128 sign_mask = _mm_set1_ps(-0.f);
+  // for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
+  //   __m128 x = _mm_load_ps(&source[i]);
+  //   x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 
4)));
+  //   x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
+  //   x = _mm_add_ps(x, offset);
+  //   __m128 y = _mm_andnot_ps(sign_mask, x);  // fabs(x)
+  //   y = _mm_min_ps(y, _mm_set1_ps(1.0f));
+  //   y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
+  //   __m128i z = _mm_cvtps_epi32(y);
+  //   z = _mm_shuffle_epi8(z, mask);
+  //   _mm_store_ss((float *)&dest[i], (__m128)z);
+  //   offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
+  // }
+
+//#else /* FT_SSE4_1 */
+
+  FT20D12 value = 0;
 
-  float          value    = 0.0f;
   while ( dest < dest_end )
   {
     value += *source++;
-    if ( value > 0.0f )
-    {
-      int n = (int)( fabs( value ) * 255.0f + 0.5f );
-      if ( n > 255 )
-        n = 255;
+
+    if(value > 0){
+      int n = value >>4;
+
+      if(n>255)n=255;
       *dest = (unsigned char)n;
-    }
-    else
+    }else{
       *dest = 0;
+    }
     dest++;
   }
 
-#endif /* FT_SSE4_1 */
+//#endif /* FT_SSE4_1 */
 
   free(worker->m_a);
   return error;
@@ -444,10 +461,10 @@ dense_raster_render( FT_Raster raster, const 
FT_Raster_Params* params )
 
   int size = worker->m_w * worker->m_h + 4;
 
-  worker->m_a      = malloc( sizeof( float ) * size );
+  worker->m_a      = malloc( sizeof( FT20D12 ) * size );
   worker->m_a_size = size;
 
-  memset( worker->m_a, 0, ( sizeof( float ) * size ) );
+  memset( worker->m_a, 0, ( sizeof( FT20D12 ) * size ) );
   /* exit if nothing to do */
   if ( worker->m_w <= worker->m_origin_x || worker->m_h <= worker->m_origin_y )
   {
diff --git a/src/dense/ftdense.h b/src/dense/ftdense.h
index 677b2ea33..0af2bb87f 100644
--- a/src/dense/ftdense.h
+++ b/src/dense/ftdense.h
@@ -19,6 +19,10 @@ extern "C"
 {
 #endif
 
+
+  typedef signed long long FT26D6;            /* 26.6 fixed-point 
representation  */
+  typedef signed long long FT20D12;           /* 20.12 fixed-point 
representation  */
+
   typedef struct
   {
     /** The array used to store signed area differences. */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]