freetype-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Git][freetype/freetype][gsoc-anurag-2022-final] [dense] Migrate line dr


From: Anurag Thakur (@AdbhutDev)
Subject: [Git][freetype/freetype][gsoc-anurag-2022-final] [dense] Migrate line drawing and accumulation to fixed-point
Date: Sat, 19 Nov 2022 07:29:25 +0000

Anurag Thakur pushed to branch gsoc-anurag-2022-final at FreeType / FreeType

Commits:

  • cd2e6217
    by Anurag Thakur at 2022-11-19T12:58:14+05:30
    [dense] Migrate line drawing and accumulation to fixed-point
    
    * src/dense/ftdense.h: (FT26D6, FT20D12): New typedefs
    
    * src/dense/ftdense.c: dense_render_line, dense_render_glyph now
    use fixed-point numbers for calculation
    
    Disabled SIMD for now
    

2 changed files:

Changes:

  • src/dense/ftdense.c
    ... ... @@ -81,57 +81,65 @@ dense_line_to( const FT_Vector* to, dense_worker* worker )
    81 81
     void
    
    82 82
     dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
    
    83 83
     {
    
    84
    -  float from_x = worker->prev_x;
    
    85
    -  float from_y = worker->prev_y;
    
    86
    -  if ( from_y == toy )
    
    87
    -    return;
    
    88 84
     
    
    85
    +  FT26D6 fx = worker->prev_x>>2;
    
    86
    +  FT26D6 fy = worker->prev_y>>2;
    
    87
    +
    
    88
    +  FT26D6 from_x = fx;
    
    89
    +  FT26D6 from_y = fy;
    
    90
    +
    
    91
    +
    
    92
    +  FT26D6 tx = tox>>2;
    
    93
    +  FT26D6 ty = toy>>2;
    
    89 94
     
    
    90
    -  from_x /= 256.0;
    
    91
    -  from_y /= 256.0;
    
    92
    -  float to_x = tox / 256.0;
    
    93
    -  float to_y = toy / 256.0;
    
    95
    +  if ( fy == ty )
    
    96
    +    return;
    
    94 97
     
    
    98
    +  FT26D6 to_x = tx;
    
    99
    +  FT26D6 to_y = ty;
    
    95 100
     
    
    96
    -  float dir;
    
    97
    -  if ( from_y < to_y )
    
    98
    -    dir = 1;
    
    99
    -  else
    
    101
    +  int dir = 1;
    
    102
    +  if ( from_y >= to_y )
    
    100 103
       {
    
    101 104
         dir = -1;
    
    102
    -    FT_SWAP(from_x, to_x );
    
    103
    -    FT_SWAP(from_y, to_y );
    
    105
    +    FT_SWAP(from_x, to_x);
    
    106
    +    FT_SWAP(from_y, to_y);
    
    104 107
       }
    
    105 108
     
    
    106 109
       // Clip to the height.
    
    107
    -  if ( from_y >= worker->m_h || to_y <= 0 )
    
    110
    +  if ( from_y >= worker->m_h<<6 || to_y <= 0 )
    
    108 111
         return;
    
    109 112
     
    
    110
    -  float dxdy = ( to_x - from_x ) / (float)( to_y - from_y );
    
    113
    +  FT26D6 deltax,deltay;
    
    114
    +  deltax = to_x - from_x;
    
    115
    +  deltay = to_y - from_y;
    
    116
    +
    
    111 117
       if ( from_y < 0 )
    
    112 118
       {
    
    113
    -    from_x -= from_y * dxdy;
    
    119
    +    from_x -= from_y * deltax/deltay;
    
    114 120
         from_y = 0;
    
    115 121
       }
    
    116
    -  if ( to_y > worker->m_h )
    
    122
    +
    
    123
    +  if ( to_y > worker->m_h<<6 )
    
    117 124
       {
    
    118
    -    to_x -= ( to_y - worker->m_h ) * dxdy;
    
    119
    -    to_y = (float)worker->m_h;
    
    125
    +    to_x -= (( to_y - worker->m_h<<6 ) * deltax/deltay);
    
    126
    +    to_y = worker->m_h<<6;
    
    120 127
       }
    
    121 128
     
    
    122
    -  float  x       = from_x;
    
    123
    -  int    y0      = (int)from_y;
    
    124
    -  int    y_limit = (int)ceil( to_y );
    
    125
    -  float* m_a     = worker->m_a;
    
    129
    +  int    x       = from_x;
    
    130
    +  int    y0      = from_y>>6;
    
    131
    +  int    y_limit = (to_y + 0x3f)>>6;
    
    132
    +
    
    133
    +  FT20D12* m_a     = worker->m_a;
    
    126 134
     
    
    127 135
       for ( int y = y0; y < y_limit; y++ )
    
    128 136
       {
    
    129 137
         int   linestart = y * worker->m_w;
    
    130
    -    float dy        = fmin( y + 1.0f, to_y ) - fmax( (float)y, from_y );
    
    131
    -    float xnext     = x + dxdy * dy;
    
    132
    -    float d         = dy * dir;
    
    138
    +    FT26D6 dy        = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y );
    
    139
    +    FT26D6 xnext     = x + dy * deltax/deltay;
    
    140
    +    FT26D6 d         = dy * dir;
    
    133 141
     
    
    134
    -    float x0, x1;
    
    142
    +    FT26D6 x0, x1;
    
    135 143
         if ( x < xnext )
    
    136 144
         {
    
    137 145
           x0 = x;
    
    ... ... @@ -143,40 +151,48 @@ dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
    143 151
           x1 = x;
    
    144 152
         }
    
    145 153
     
    
    146
    -    /*
    
    147
    -    It's possible for x0 to be negative on the last scanline because of
    
    148
    -    floating-point inaccuracy That would cause an out-of-bounds array access at
    
    149
    -    index -1.
    
    150
    -    */
    
    151
    -    float x0floor = x0 <= 0.0f ? 0.0f : (float)floor( x0 );
    
    152 154
     
    
    153
    -    int   x0i    = (int)x0floor;
    
    154
    -    float x1ceil = (float)ceil( x1 );
    
    155
    -    int   x1i    = (int)x1ceil;
    
    155
    +    int   x0i    = x0>>6;
    
    156
    +    FT26D6 x0floor = x0i<<6;
    
    157
    +
    
    158
    +
    
    159
    +    int   x1i    = (x1+0x3f)>>6;
    
    160
    +    FT26D6 x1ceil =  x1i <<6;
    
    161
    +
    
    156 162
         if ( x1i <= x0i + 1 )
    
    157 163
         {
    
    158
    -      float xmf = 0.5f * ( x + xnext ) - x0floor;
    
    159
    -      m_a[linestart + x0i] += d - d * xmf;
    
    164
    +      FT26D6 xmf = ( ( x + xnext )>>1) - x0floor;
    
    165
    +      m_a[linestart + x0i] += d * ((1<<6) - xmf);
    
    160 166
           m_a[linestart + ( x0i + 1 )] += d * xmf;
    
    161 167
         }
    
    162 168
         else
    
    163 169
         {
    
    164
    -      float s   = 1.0f / ( x1 - x0 );
    
    165
    -      float x0f = x0 - x0floor;
    
    166
    -      float a0  = 0.5f * s * ( 1.0f - x0f ) * ( 1.0f - x0f );
    
    167
    -      float x1f = x1 - x1ceil + 1.0f;
    
    168
    -      float am  = 0.5f * s * x1f * x1f;
    
    170
    +
    
    171
    +      FT26D6 oneOverS = x1 - x0;
    
    172
    +      FT26D6 x0f = x0 - x0floor;
    
    173
    +
    
    174
    +
    
    175
    +      FT26D6 oneMinusX0f = (1<<6) - x0f;
    
    176
    +      FT26D6 a0 = ((oneMinusX0f * oneMinusX0f) >> 1) / oneOverS;
    
    177
    +      FT26D6 x1f = x1 - x1ceil + (1<<6);
    
    178
    +      FT26D6 am = ((x1f * x1f) >> 1) / oneOverS;
    
    179
    +
    
    169 180
           m_a[linestart + x0i] += d * a0;
    
    181
    +
    
    170 182
           if ( x1i == x0i + 2 )
    
    171
    -        m_a[linestart + ( x0i + 1 )] += d * ( 1.0f - a0 - am );
    
    183
    +        m_a[linestart + ( x0i + 1 )] += d * ( (1<<6) - a0 - am );
    
    172 184
           else
    
    173 185
           {
    
    174
    -        float a1 = s * ( 1.5f - x0f );
    
    186
    +        FT26D6 a1 = (((1<<6) + (1<<5) - x0f) << 6) / oneOverS;
    
    175 187
             m_a[linestart + ( x0i + 1 )] += d * ( a1 - a0 );
    
    176
    -        for ( int xi = x0i + 2; xi < x1i - 1; xi++ )
    
    177
    -          m_a[linestart + xi] += d * s;
    
    178
    -        float a2 = a1 + ( x1i - x0i - 3 ) * s;
    
    179
    -        m_a[linestart + ( x1i - 1 )] += d * ( 1.0f - a2 - am );
    
    188
    +
    
    189
    +        FT26D6 dTimesS = (d << 12) / oneOverS;
    
    190
    +
    
    191
    +        for ( FT26D6 xi = x0i + 2; xi < x1i - 1; xi++ )
    
    192
    +          m_a[linestart + xi] += dTimesS;
    
    193
    +
    
    194
    +        FT26D6 a2 = a1 + (( x1i - x0i - 3 )<<12)/oneOverS;
    
    195
    +        m_a[linestart + ( x1i - 1 )] += d * ( (1<<6) - a2 - am );
    
    180 196
           }
    
    181 197
           m_a[linestart + x1i] += d * am;
    
    182 198
         }
    
    ... ... @@ -364,48 +380,49 @@ dense_render_glyph( dense_worker* worker, const FT_Bitmap* target )
    364 380
       FT_Error error = FT_Outline_Decompose( &( worker->outline ),
    
    365 381
                                              &dense_decompose_funcs, worker );
    
    366 382
       // Render into bitmap
    
    367
    -  const float* source = worker->m_a;
    
    383
    +  const FT20D12* source = worker->m_a;
    
    368 384
       unsigned char* dest     = target->buffer;
    
    369 385
       unsigned char* dest_end = target->buffer + worker->m_w * worker->m_h;
    
    370 386
     
    
    371
    -#if FT_SSE4_1
    
    372
    -
    
    373
    -  __m128 offset = _mm_setzero_ps();
    
    374
    -  __m128i mask = _mm_set1_epi32(0x0c080400);
    
    375
    -  __m128 sign_mask = _mm_set1_ps(-0.f);
    
    376
    -  for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
    
    377
    -    __m128 x = _mm_load_ps(&source[i]);
    
    378
    -    x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
    
    379
    -    x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
    
    380
    -    x = _mm_add_ps(x, offset);
    
    381
    -    __m128 y = _mm_andnot_ps(sign_mask, x);  // fabs(x)
    
    382
    -    y = _mm_min_ps(y, _mm_set1_ps(1.0f));
    
    383
    -    y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
    
    384
    -    __m128i z = _mm_cvtps_epi32(y);
    
    385
    -    z = _mm_shuffle_epi8(z, mask);
    
    386
    -    _mm_store_ss((float *)&dest[i], (__m128)z);
    
    387
    -    offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
    
    388
    -  }
    
    389
    -
    
    390
    -#else /* FT_SSE4_1 */
    
    387
    +//#if FT_SSE4_1
    
    388
    +
    
    389
    +  // __m128 offset = _mm_setzero_ps();
    
    390
    +  // __m128i mask = _mm_set1_epi32(0x0c080400);
    
    391
    +  // __m128 sign_mask = _mm_set1_ps(-0.f);
    
    392
    +  // for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
    
    393
    +  //   __m128 x = _mm_load_ps(&source[i]);
    
    394
    +  //   x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
    
    395
    +  //   x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
    
    396
    +  //   x = _mm_add_ps(x, offset);
    
    397
    +  //   __m128 y = _mm_andnot_ps(sign_mask, x);  // fabs(x)
    
    398
    +  //   y = _mm_min_ps(y, _mm_set1_ps(1.0f));
    
    399
    +  //   y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
    
    400
    +  //   __m128i z = _mm_cvtps_epi32(y);
    
    401
    +  //   z = _mm_shuffle_epi8(z, mask);
    
    402
    +  //   _mm_store_ss((float *)&dest[i], (__m128)z);
    
    403
    +  //   offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
    
    404
    +  // }
    
    405
    +
    
    406
    +//#else /* FT_SSE4_1 */
    
    407
    +
    
    408
    +  FT20D12 value = 0;
    
    391 409
     
    
    392
    -  float          value    = 0.0f;
    
    393 410
       while ( dest < dest_end )
    
    394 411
       {
    
    395 412
         value += *source++;
    
    396
    -    if ( value > 0.0f )
    
    397
    -    {
    
    398
    -      int n = (int)( fabs( value ) * 255.0f + 0.5f );
    
    399
    -      if ( n > 255 )
    
    400
    -        n = 255;
    
    413
    +
    
    414
    +    if(value > 0){
    
    415
    +      int n = value >>4;
    
    416
    +
    
    417
    +      if(n>255)n=255;
    
    401 418
           *dest = (unsigned char)n;
    
    402
    -    }
    
    403
    -    else
    
    419
    +    }else{
    
    404 420
           *dest = 0;
    
    421
    +    }
    
    405 422
         dest++;
    
    406 423
       }
    
    407 424
     
    
    408
    -#endif /* FT_SSE4_1 */
    
    425
    +//#endif /* FT_SSE4_1 */
    
    409 426
     
    
    410 427
       free(worker->m_a);
    
    411 428
       return error;
    
    ... ... @@ -444,10 +461,10 @@ dense_raster_render( FT_Raster raster, const FT_Raster_Params* params )
    444 461
     
    
    445 462
       int size = worker->m_w * worker->m_h + 4;
    
    446 463
     
    
    447
    -  worker->m_a      = malloc( sizeof( float ) * size );
    
    464
    +  worker->m_a      = malloc( sizeof( FT20D12 ) * size );
    
    448 465
       worker->m_a_size = size;
    
    449 466
     
    
    450
    -  memset( worker->m_a, 0, ( sizeof( float ) * size ) );
    
    467
    +  memset( worker->m_a, 0, ( sizeof( FT20D12 ) * size ) );
    
    451 468
       /* exit if nothing to do */
    
    452 469
       if ( worker->m_w <= worker->m_origin_x || worker->m_h <= worker->m_origin_y )
    
    453 470
       {
    

  • src/dense/ftdense.h
    ... ... @@ -19,6 +19,10 @@ extern "C"
    19 19
     {
    
    20 20
     #endif
    
    21 21
     
    
    22
    +
    
    23
    +  typedef signed long long FT26D6;            /* 26.6 fixed-point representation  */
    
    24
    +  typedef signed long long FT20D12;           /* 20.12 fixed-point representation  */
    
    25
    +
    
    22 26
       typedef struct
    
    23 27
       {
    
    24 28
         /** The array used to store signed area differences. */
    


  • reply via email to

    [Prev in Thread] Current Thread [Next in Thread]