freetype-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Git][freetype/freetype][multiply-shift] 4 commits: [smooth] Detect SSE2


From: Alexei Podtelezhnikov (@apodtele)
Subject: [Git][freetype/freetype][multiply-shift] 4 commits: [smooth] Detect SSE2 with MSVC for x86
Date: Sun, 29 Aug 2021 21:55:23 +0000

Alexei Podtelezhnikov pushed to branch multiply-shift at FreeType / FreeType

Commits:

4 changed files:

Changes:

  • builds/windows/vc2010/freetype.vcxproj
    ... ... @@ -242,7 +242,6 @@
    242 242
           <CompileAs>Default</CompileAs>
    
    243 243
           <DisableSpecificWarnings>4001</DisableSpecificWarnings>
    
    244 244
           <MultiProcessorCompilation>true</MultiProcessorCompilation>
    
    245
    -      <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
    
    246 245
           <IntrinsicFunctions>true</IntrinsicFunctions>
    
    247 246
         </ClCompile>
    
    248 247
         <ResourceCompile>
    
    ... ... @@ -296,7 +295,6 @@
    296 295
           <CompileAs>Default</CompileAs>
    
    297 296
           <DisableSpecificWarnings>4001</DisableSpecificWarnings>
    
    298 297
           <MultiProcessorCompilation>true</MultiProcessorCompilation>
    
    299
    -      <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
    
    300 298
           <IntrinsicFunctions>true</IntrinsicFunctions>
    
    301 299
         </ClCompile>
    
    302 300
         <ResourceCompile>
    

  • builds/windows/visualc/freetype.vcproj
    ... ... @@ -45,6 +45,7 @@
    45 45
     				PreprocessorDefinitions="NDEBUG;WIN32;_LIB;_CRT_SECURE_NO_WARNINGS;FT2_BUILD_LIBRARY;DLL_EXPORT"
    
    46 46
     				StringPooling="true"
    
    47 47
     				RuntimeLibrary="2"
    
    48
    +				EnableEnhancedInstructionSet="2"
    
    48 49
     				EnableFunctionLevelLinking="true"
    
    49 50
     				DisableLanguageExtensions="true"
    
    50 51
     				WarningLevel="4"
    
    ... ... @@ -121,6 +122,7 @@
    121 122
     				PreprocessorDefinitions="NDEBUG;WIN32;_LIB;_CRT_SECURE_NO_WARNINGS;FT2_BUILD_LIBRARY"
    
    122 123
     				StringPooling="true"
    
    123 124
     				RuntimeLibrary="0"
    
    125
    +				EnableEnhancedInstructionSet="2"
    
    124 126
     				EnableFunctionLevelLinking="true"
    
    125 127
     				DisableLanguageExtensions="true"
    
    126 128
     				WarningLevel="4"
    

  • src/smooth/ftgrays.c
    ... ... @@ -386,12 +386,11 @@ typedef ptrdiff_t FT_PtrDist;
    386 386
       /* divisors to provide sufficient accuracy of the multiply-shift.    */
    
    387 387
       /* It should not exceed (64 - PIXEL_BITS) to prevent overflowing and */
    
    388 388
       /* leave enough room for 64-bit unsigned multiplication however.     */
    
    389
    -#define FT_UDIVPREP( c, b )                                                 \
    
    390
    -  FT_Int64  b ## _r = c ? (FT_Int64)( ~(FT_UInt64)0 >> PIXEL_BITS ) / ( b ) \
    
    389
    +#define FT_UDIVPREP( c, b )                            \
    
    390
    +  FT_Int64  b ## _r = c ? (FT_Int64)0xFFFFFFFF / ( b ) \
    
    391 391
                         : 0
    
    392
    -#define FT_UDIV( a, b )                                         \
    
    393
    -  (TCoord)( ( (FT_UInt64)( a ) * (FT_UInt64)( b ## _r ) ) >>    \
    
    394
    -            ( sizeof( FT_UInt64 ) * FT_CHAR_BIT - PIXEL_BITS ) )
    
    392
    +#define FT_UDIV( a, b )                                           \
    
    393
    +  (TCoord)( ( (FT_UInt64)( a ) * (FT_UInt64)( b ## _r ) ) >> 32 )
    
    395 394
     
    
    396 395
     
    
    397 396
       /* Scale area and apply fill rule to calculate the coverage byte. */
    
    ... ... @@ -999,10 +998,17 @@ typedef ptrdiff_t FT_PtrDist;
    999 998
        *
    
    1000 999
        * For other cases, using binary splits is actually slightly faster.
    
    1001 1000
        */
    
    1002
    -#if defined( __SSE2__ )    || \
    
    1003
    -    defined( __x86_64__ )  || \
    
    1001
    +#if defined( __SSE2__ )                          || \
    
    1002
    +    defined( __x86_64__ )                        || \
    
    1003
    +    defined( _M_AMD64 )                          || \
    
    1004
    +    ( defined( _M_IX86_FP ) && _M_IX86_FP >= 2 )
    
    1005
    +#  define FT_SSE2 1
    
    1006
    +#else
    
    1007
    +#  define FT_SSE2 0
    
    1008
    +#endif
    
    1009
    +
    
    1010
    +#if FT_SSE2                || \
    
    1004 1011
         defined( __aarch64__ ) || \
    
    1005
    -    defined( _M_AMD64 )    || \
    
    1006 1012
         defined( _M_ARM64 )
    
    1007 1013
     #  define BEZIER_USE_DDA  1
    
    1008 1014
     #else
    
    ... ... @@ -1022,7 +1028,7 @@ typedef ptrdiff_t FT_PtrDist;
    1022 1028
     
    
    1023 1029
     #if BEZIER_USE_DDA
    
    1024 1030
     
    
    1025
    -#ifdef __SSE2__
    
    1031
    +#if FT_SSE2
    
    1026 1032
     #  include <emmintrin.h>
    
    1027 1033
     #endif
    
    1028 1034
     
    
    ... ... @@ -1135,7 +1141,7 @@ typedef ptrdiff_t FT_PtrDist;
    1135 1141
          *             = (B << (33 - N)) + (A << (32 - 2*N))
    
    1136 1142
          */
    
    1137 1143
     
    
    1138
    -#ifdef __SSE2__
    
    1144
    +#if FT_SSE2
    
    1139 1145
         /* Experience shows that for small shift values, */
    
    1140 1146
         /* SSE2 is actually slower.                      */
    
    1141 1147
         if ( shift > 2 )
    
    ... ... @@ -1192,7 +1198,7 @@ typedef ptrdiff_t FT_PtrDist;
    1192 1198
     
    
    1193 1199
           return;
    
    1194 1200
         }
    
    1195
    -#endif  /* __SSE2__ */
    
    1201
    +#endif  /* FT_SSE2 */
    
    1196 1202
     
    
    1197 1203
         rx = LEFT_SHIFT( ax, 33 - 2 * shift );
    
    1198 1204
         ry = LEFT_SHIFT( ay, 33 - 2 * shift );
    

  • src/truetype/ttgxvar.c
    ... ... @@ -3164,6 +3164,8 @@
    3164 3164
       /*************************************************************************/
    
    3165 3165
     
    
    3166 3166
     
    
    3167
    +#ifdef TT_CONFIG_GPTION_BYTECODE_INTERPRETER
    
    3168
    +
    
    3167 3169
       static FT_Error
    
    3168 3170
       tt_cvt_ready_iterator( FT_ListNode  node,
    
    3169 3171
                              void*        user )
    
    ... ... @@ -3178,6 +3180,9 @@
    3178 3180
         return FT_Err_Ok;
    
    3179 3181
       }
    
    3180 3182
     
    
    3183
    +#endif /* TT_CONFIG_OPTION_BYTECODE_INTERPRETER */
    
    3184
    +
    
    3185
    +
    
    3181 3186
     
    
    3182 3187
       /**************************************************************************
    
    3183 3188
        *
    
    ... ... @@ -3206,6 +3211,8 @@
    3206 3211
       tt_face_vary_cvt( TT_Face    face,
    
    3207 3212
                         FT_Stream  stream )
    
    3208 3213
       {
    
    3214
    +#ifdef TT_CONFIG_GPTION_BYTECODE_INTERPRETER
    
    3215
    +
    
    3209 3216
         FT_Error   error;
    
    3210 3217
         FT_Memory  memory = stream->memory;
    
    3211 3218
     
    
    ... ... @@ -3526,6 +3533,16 @@
    3526 3533
                          NULL );
    
    3527 3534
     
    
    3528 3535
         return error;
    
    3536
    +
    
    3537
    +#else /* !TT_CONFIG_OPTION_BYTECODE_INTERPRETER */
    
    3538
    +
    
    3539
    +    FT_UNUSED( face );
    
    3540
    +    FT_UNUSED( stream );
    
    3541
    +
    
    3542
    +    return FT_Err_Ok;
    
    3543
    +
    
    3544
    +#endif /* !TT_CONFIG_OPTION_BYTECODE_INTERPRETER */
    
    3545
    +
    
    3529 3546
       }
    
    3530 3547
     
    
    3531 3548
     
    


  • reply via email to

    [Prev in Thread] Current Thread [Next in Thread]