freetype-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Git][freetype/freetype][gsoc-anurag-2022] Add SIMD


From: Anurag Thakur (@AdbhutDev)
Subject: [Git][freetype/freetype][gsoc-anurag-2022] Add SIMD
Date: Thu, 15 Sep 2022 19:15:55 +0000

Anurag Thakur pushed to branch gsoc-anurag-2022 at FreeType / FreeType

Commits:

  • d03fa45e
    by Anurag Thakur at 2022-09-16T00:45:17+05:30
    Add SIMD
    

3 changed files:

Changes:

  • .vscode/settings.json
    ... ... @@ -2,6 +2,7 @@
    2 2
         "files.associations": {
    
    3 3
             "ftoutln.h": "c",
    
    4 4
             "svprop.h": "c",
    
    5
    -        "ftdebug.h": "c"
    
    5
    +        "ftdebug.h": "c",
    
    6
    +        "tmmintrin.h": "c"
    
    6 7
         }
    
    7
    -}
    \ No newline at end of file
    8
    +}

  • src/dense/ftdense.c
    ... ... @@ -11,6 +11,7 @@
    11 11
     #include "ftdense.h"
    
    12 12
     
    
    13 13
     #include <math.h>
    
    14
    +#include <tmmintrin.h>
    
    14 15
     #include "ftdenseerrs.h"
    
    15 16
     
    
    16 17
     #define PIXEL_BITS 8
    
    ... ... @@ -372,22 +373,40 @@ dense_render_glyph( dense_worker* worker, const FT_Bitmap* target )
    372 373
     
    
    373 374
       unsigned char* dest     = target->buffer;
    
    374 375
       unsigned char* dest_end = target->buffer + worker->m_w * worker->m_h;
    
    375
    -  float          value    = 0.0f;
    
    376
    -  while ( dest < dest_end )
    
    377
    -  {
    
    378
    -    value += *source++;
    
    379
    -    if ( value > 0.0f )
    
    380
    -    {
    
    381
    -      int n = (int)( fabs( value ) * 255.0f + 0.5f );
    
    382
    -      if ( n > 255 )
    
    383
    -        n = 255;
    
    384
    -      *dest = (unsigned char)n;
    
    385
    -    }
    
    386
    -    else
    
    387
    -      *dest = 0;
    
    388
    -    dest++;
    
    376
    +
    
    377
    +  __m128 offset = _mm_setzero_ps();
    
    378
    +  __m128i mask = _mm_set1_epi32(0x0c080400);
    
    379
    +  __m128 sign_mask = _mm_set1_ps(-0.f);
    
    380
    +  for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
    
    381
    +    __m128 x = _mm_load_ps(&source[i]);
    
    382
    +    x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
    
    383
    +    x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
    
    384
    +    x = _mm_add_ps(x, offset);
    
    385
    +    __m128 y = _mm_andnot_ps(sign_mask, x);  // fabs(x)
    
    386
    +    y = _mm_min_ps(y, _mm_set1_ps(1.0f));
    
    387
    +    y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
    
    388
    +    __m128i z = _mm_cvtps_epi32(y);
    
    389
    +    z = _mm_shuffle_epi8(z, mask);
    
    390
    +    _mm_store_ss((float *)&dest[i], (__m128)z);
    
    391
    +    offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
    
    389 392
       }
    
    390 393
     
    
    394
    +  // float          value    = 0.0f;
    
    395
    +  // while ( dest < dest_end )
    
    396
    +  // {
    
    397
    +  //   value += *source++;
    
    398
    +  //   if ( value > 0.0f )
    
    399
    +  //   {
    
    400
    +  //     int n = (int)( fabs( value ) * 255.0f + 0.5f );
    
    401
    +  //     if ( n > 255 )
    
    402
    +  //       n = 255;
    
    403
    +  //     *dest = (unsigned char)n;
    
    404
    +  //   }
    
    405
    +  //   else
    
    406
    +  //     *dest = 0;
    
    407
    +  //   dest++;
    
    408
    +  // }
    
    409
    +
    
    391 410
       free(worker->m_a);
    
    392 411
       return error;
    
    393 412
     }
    

  • src/dense/rules.mk
    ... ... @@ -22,8 +22,9 @@ DENSE_DIR := $(SRC_DIR)/dense
    22 22
     #
    
    23 23
     DENSE_COMPILE := $(CC) $(ANSIFLAGS)                               \
    
    24 24
                             $I$(subst /,$(COMPILER_SEP),$(DENSE_DIR)) \
    
    25
    -                        $(INCLUDE_FLAGS)                           \
    
    26
    -                        $(FT_CFLAGS)
    
    25
    +                        $(INCLUDE_FLAGS)                          \
    
    26
    +                        $(FT_CFLAGS)                              \
    
    27
    +                        "-msse4.1"
    
    27 28
     
    
    28 29
     
    
    29 30
     # DENSE driver sources (i.e., C files)
    


  • reply via email to

    [Prev in Thread] Current Thread [Next in Thread]