freetype-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Git][freetype/freetype][gsoc-anurag-2023-final] 11 commits: [dense] Add


From: Anurag Thakur (@AdbhutDev)
Subject: [Git][freetype/freetype][gsoc-anurag-2023-final] 11 commits: [dense] Add -msse4.1 to compile with CMake
Date: Mon, 09 Oct 2023 22:16:52 +0000

Anurag Thakur pushed to branch gsoc-anurag-2023-final at FreeType / FreeType

Commits:

  • cf779f5f
    by Anurag Thakur at 2023-10-10T03:15:13+05:30
    [dense] Add -msse4.1 to compile with CMake
    
    * CMakeLists.txt: Add -msse4.1 to ${CMAKE_C_FLAGS}
    
  • 1bf65eff
    by Anurag Thakur at 2023-10-10T03:17:34+05:30
    [dense] Add compilation fixes for meson
    
    * builds/meson/parse_modules_cfg.py: Add dense module
    
    * meson.build: Add SSE flags to ft2_defines, -lm to link_args
    
    * src/dense/ftdense.c: Fixes for compilation errors
    
  • 3b3c4662
    by Anurag Thakur at 2023-10-10T03:20:48+05:30
    [dense] Add ARM NEON support and improve SSE perf
    
    * src/dense/ftdense.c: Add FT_NEON flag, implement ARM NEON support
    in dense_render_glyph, improve SSE performance
    
    * src/dense/rules.mk: Replacse -msse4.1 with -march=native
    
  • 45de1dba
    by Anurag Thakur at 2023-10-10T03:24:09+05:30
    [dense] Declare FT_New_Face2
    
    * include/freetype/freetype.h: Add FT_New_Face2 function to be used
    for preloading optimization
    
  • c33e0b82
    by Anurag Thakur at 2023-10-10T03:26:28+05:30
    [dense] Add FT_PreLine struct
    
    * include/freetype/freetype.h: Add FT_PreLineRec struct and its handle FT_PreLine
    
  • d074c39c
    by Anurag Thakur at 2023-10-10T03:28:13+05:30
    [dense] Modified FT_FaceRec, FT_GlyphSlotRec and FT_Raster_Params
    
    * include/freetype/freetype.h: Add glyph_array filed to FT_FaceRec,
    prelines, prel_shifted fileds to GlyphSlotRec
    
    * include/freetype/ftimage.h: Add prelines filed to FT_raster_Params
    
  • e7c4fb9d
    by Anurag Thakur at 2023-10-10T03:31:03+05:30
    [dense] Add FT_Refresh_Glyph
    
    * include/freetype/freetype.h: Declare FT_Refresh_Glyph
    
    * src/base/ftobjs.c: Implement FT_Refresh_Glyph
    
  • b2f570a2
    by Anurag Thakur at 2023-10-10T03:35:20+05:30
    [dense] Implement FT_New_Face2 and fix glyph loading
    
    * include/freetype/freetype.h: Add filed "size" to FT_Open_Args
    
    * src/base/ftobjs.c: Use slot from face's glyph_array in FT_Load_Glyph
    Implement FT_New_Face2
    
  • 2254ce19
    by Anurag Thakur at 2023-10-10T03:38:02+05:30
    [dense] Add code for curve flattening at load time
    
    src/base/ftobjs.c: Add Lerp, conic_to2, ft_decompose_outline functions
    
  • c1804c48
    by Anurag Thakur at 2023-10-10T03:39:47+05:30
    [dense] Add support for preloading in ft_open_face_internal
    
    * src/base/ftobjs.c: Add code for loading glyph data into
    face->glyph_aray after ft_open_face_internal has succeeded
    
  • b1f9f98e
    by Anurag Thakur at 2023-10-10T03:41:29+05:30
    [dense] Add support for rendering prelines
    
    * src/dense/ftdense.c: Add dense_render_line2 function, that takes
    a PreLine as argument.
    
    FT_Outline_Decompose call in dense_render_glyph replaced by a loop
    that renders PreLines
    
    * src/dense/ftdenserend.c: Add support for shifting PreLines as
    per target bitmap
    

9 changed files:

Changes:

  • CMakeLists.txt
    ... ... @@ -247,6 +247,8 @@ if (BUILD_FRAMEWORK)
    247 247
     endif ()
    
    248 248
     
    
    249 249
     
    
    250
    +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1")
    
    251
    +
    
    250 252
     # Find dependencies
    
    251 253
     include(FindPkgConfig)
    
    252 254
     
    

  • builds/meson/parse_modules_cfg.py
    ... ... @@ -87,6 +87,7 @@ def generate_ftmodule(lists):
    87 87
             names = {
    
    88 88
                 "raster": ("ft_raster1",),
    
    89 89
                 "smooth": ("ft_smooth",),
    
    90
    +            "dense": ("ft_dense",),
    
    90 91
                 "svg": ("ft_svg",),
    
    91 92
                 "sdf": ("ft_sdf", "ft_bitmap_sdf"),
    
    92 93
             }.get(module)
    

  • include/freetype/freetype.h
    ... ... @@ -1276,6 +1276,7 @@ FT_BEGIN_HEADER
    1276 1276
         FT_ListRec        sizes_list;
    
    1277 1277
     
    
    1278 1278
         FT_Generic        autohint;   /* face-specific auto-hinter data */
    
    1279
    +    FT_GlyphSlot*      glyph_array;
    
    1279 1280
         void*             extensions; /* unused                         */
    
    1280 1281
     
    
    1281 1282
         FT_Face_Internal  internal;
    
    ... ... @@ -1283,6 +1284,44 @@ FT_BEGIN_HEADER
    1283 1284
       } FT_FaceRec;
    
    1284 1285
     
    
    1285 1286
     
    
    1287
    +
    
    1288
    +  /**************************************************************************
    
    1289
    +   *
    
    1290
    +   * @type:
    
    1291
    +   *   FT_PreLine
    
    1292
    +   *
    
    1293
    +   * @description:
    
    1294
    +   *   A handle to FT_PreLineRec_ containing coordinates of start and end
    
    1295
    +   *   points for a line.
    
    1296
    +   *
    
    1297
    +   */
    
    1298
    +  typedef struct FT_PreLineRec_* FT_PreLine;
    
    1299
    +
    
    1300
    +  /**************************************************************************
    
    1301
    +   *
    
    1302
    +   * @struct:
    
    1303
    +   *   FT_PreLineRec
    
    1304
    +   *
    
    1305
    +   * @description:
    
    1306
    +   *   Linkedlist containing lines to be drawn for a glyph.
    
    1307
    +   *
    
    1308
    +   * @fields:
    
    1309
    +   *   x1, y1 ::
    
    1310
    +   *     Coordinates of line start point.
    
    1311
    +   *
    
    1312
    +   *   y1, y2 ::
    
    1313
    +   *     Coordinates of line end point.
    
    1314
    +   *
    
    1315
    +   *   next ::
    
    1316
    +   *     The next PreLine for current glyph
    
    1317
    +   *
    
    1318
    +   */
    
    1319
    +  typedef struct FT_PreLineRec_
    
    1320
    +  {
    
    1321
    +    int x1, x2, y1, y2;
    
    1322
    +    FT_PreLine next;
    
    1323
    +  } FT_PreLineRec;
    
    1324
    +
    
    1286 1325
       /**************************************************************************
    
    1287 1326
        *
    
    1288 1327
        * @enum:
    
    ... ... @@ -2171,6 +2210,12 @@ FT_BEGIN_HEADER
    2171 2210
        *   other ::
    
    2172 2211
        *     Reserved.
    
    2173 2212
        *
    
    2213
    +   *   prelines ::
    
    2214
    +   *     Linkedlist containing lines to be drawn for the glyph
    
    2215
    +   *
    
    2216
    +   *   prel_shifted ::
    
    2217
    +   *     If the points in preline have been adjustted according to target bitmap
    
    2218
    +   *
    
    2174 2219
        *   lsb_delta ::
    
    2175 2220
        *     The difference between hinted and unhinted left side bearing while
    
    2176 2221
        *     auto-hinting is active.  Zero otherwise.
    
    ... ... @@ -2288,6 +2333,8 @@ FT_BEGIN_HEADER
    2288 2333
         FT_Pos            rsb_delta;
    
    2289 2334
     
    
    2290 2335
         void*             other;
    
    2336
    +    FT_PreLine        prelines;
    
    2337
    +    int               prel_shifted;
    
    2291 2338
     
    
    2292 2339
         FT_Slot_Internal  internal;
    
    2293 2340
     
    
    ... ... @@ -2487,6 +2534,10 @@ FT_BEGIN_HEADER
    2487 2534
        *   params ::
    
    2488 2535
        *     Extra parameters passed to the font driver when opening a new face.
    
    2489 2536
        *
    
    2537
    +   *   size ::
    
    2538
    +   *     Size at which the glyphs will be rendered. Use same value as
    
    2539
    +   *     @FT_Set_Pixel_Sizes
    
    2540
    +   *
    
    2490 2541
        * @note:
    
    2491 2542
        *   The stream type is determined by the contents of `flags`:
    
    2492 2543
        *
    
    ... ... @@ -2524,6 +2575,7 @@ FT_BEGIN_HEADER
    2524 2575
         FT_Module       driver;
    
    2525 2576
         FT_Int          num_params;
    
    2526 2577
         FT_Parameter*   params;
    
    2578
    +    FT_UInt         size;
    
    2527 2579
     
    
    2528 2580
       } FT_Open_Args;
    
    2529 2581
     
    
    ... ... @@ -2573,6 +2625,54 @@ FT_BEGIN_HEADER
    2573 2625
                    FT_Face     *aface );
    
    2574 2626
     
    
    2575 2627
     
    
    2628
    +/**************************************************************************
    
    2629
    +   *
    
    2630
    +   * @function:
    
    2631
    +   *   FT_New_Face2
    
    2632
    +   *
    
    2633
    +   * @description:
    
    2634
    +   *   Call @FT_Open_Face to open a font by its pathname with given flags.
    
    2635
    +   *
    
    2636
    +   * @inout:
    
    2637
    +   *   library ::
    
    2638
    +   *     A handle to the library resource.
    
    2639
    +   *
    
    2640
    +   * @input:
    
    2641
    +   *   pathname ::
    
    2642
    +   *     A path to the font file.
    
    2643
    +   *
    
    2644
    +   *   face_index ::
    
    2645
    +   *     See @FT_Open_Face for a detailed description of this parameter.
    
    2646
    +   *
    
    2647
    +   *   size ::
    
    2648
    +   *     Size at which glyphs will be rendered, Use the same value as @FT_Set_Pixel_Sizes
    
    2649
    +   *
    
    2650
    +   * @output:
    
    2651
    +   *   aface ::
    
    2652
    +   *     A handle to a new face object.  If `face_index` is greater than or
    
    2653
    +   *     equal to zero, it must be non-`NULL`.
    
    2654
    +   *
    
    2655
    +   * @return:
    
    2656
    +   *   FreeType error code.  0~means success.
    
    2657
    +   *
    
    2658
    +   * @note:
    
    2659
    +   *   The `pathname` string should be recognizable as such by a standard
    
    2660
    +   *   `fopen` call on your system; in particular, this means that `pathname`
    
    2661
    +   *   must not contain null bytes.  If that is not sufficient to address all
    
    2662
    +   *   file name possibilities (for example, to handle wide character file
    
    2663
    +   *   names on Windows in UTF-16 encoding) you might use @FT_Open_Face to
    
    2664
    +   *   pass a memory array or a stream object instead.
    
    2665
    +   *
    
    2666
    +   *   Use @FT_Done_Face to destroy the created @FT_Face object (along with
    
    2667
    +   *   its slot and sizes).
    
    2668
    +   */
    
    2669
    +  FT_EXPORT( FT_Error )
    
    2670
    +  FT_New_Face2( FT_Library   library,
    
    2671
    +               const char*  filepathname,
    
    2672
    +               FT_Long      face_index,
    
    2673
    +               FT_Face     *aface,
    
    2674
    +               FT_UInt      size);
    
    2675
    +
    
    2576 2676
       /**************************************************************************
    
    2577 2677
        *
    
    2578 2678
        * @function:
    
    ... ... @@ -3228,6 +3328,31 @@ FT_BEGIN_HEADER
    3228 3328
                      FT_UInt   glyph_index,
    
    3229 3329
                      FT_Int32  load_flags );
    
    3230 3330
     
    
    3331
    +  /**************************************************************************
    
    3332
    +   *
    
    3333
    +   * @function:
    
    3334
    +   *   FT_Refresh_Glyph
    
    3335
    +   *
    
    3336
    +   * @description:
    
    3337
    +   *   Prepare the glyph at glyph_index for rendering. Resets the glyph
    
    3338
    +   *   if it has already been rendered
    
    3339
    +   *
    
    3340
    +   * @inout:
    
    3341
    +   *   face ::
    
    3342
    +   *     A handle to the target face object where the glyph is loaded.
    
    3343
    +   *
    
    3344
    +   * @input:
    
    3345
    +   *   glyph_index ::
    
    3346
    +   *     The index of the glyph in the font file.
    
    3347
    +   *
    
    3348
    +   * @return:
    
    3349
    +   *   FreeType error code.  0~means success.
    
    3350
    +   *
    
    3351
    +   */
    
    3352
    +  FT_EXPORT( FT_Error )
    
    3353
    +  FT_Refresh_Glyph( FT_Face   face,
    
    3354
    +                    FT_UInt   glyph_index);
    
    3355
    +
    
    3231 3356
     
    
    3232 3357
       /**************************************************************************
    
    3233 3358
        *
    

  • include/freetype/ftimage.h
    ... ... @@ -1030,6 +1030,9 @@ FT_BEGIN_HEADER
    1030 1030
        *     An optional span clipping box expressed in _integer_ pixels
    
    1031 1031
        *     (not in 26.6 fixed-point units).
    
    1032 1032
        *
    
    1033
    +   *   prelines ::
    
    1034
    +   *     Pointer of type FT_PreLine, containing line data for a glyph
    
    1035
    +   *
    
    1033 1036
        * @note:
    
    1034 1037
        *   The @FT_RASTER_FLAG_AA bit flag must be set in the `flags` to
    
    1035 1038
        *   generate an anti-aliased glyph bitmap, otherwise a monochrome bitmap
    
    ... ... @@ -1059,6 +1062,7 @@ FT_BEGIN_HEADER
    1059 1062
         FT_Raster_BitSet_Func   bit_set;      /* unused */
    
    1060 1063
         void*                   user;
    
    1061 1064
         FT_BBox                 clip_box;
    
    1065
    +    void*                   prelines;
    
    1062 1066
     
    
    1063 1067
       } FT_Raster_Params;
    
    1064 1068
     
    

  • meson.build
    ... ... @@ -395,6 +395,13 @@ if use_unix_ftsystem_c
    395 395
     endif
    
    396 396
     
    
    397 397
     
    
    398
    +if cc.get_id() == 'msvc'
    
    399
    +  ft2_defines += ['/arch:AVX']
    
    400
    +else
    
    401
    +  ft2_defines += ['-msse4.1']
    
    402
    +endif
    
    403
    +
    
    404
    +
    
    398 405
     ft2_lib = library('freetype',
    
    399 406
       sources: ft2_sources + [ftmodule_h],
    
    400 407
       c_args: ft2_defines,
    
    ... ... @@ -403,7 +410,7 @@ ft2_lib = library('freetype',
    403 410
       dependencies: ft2_deps,
    
    404 411
       install: true,
    
    405 412
       version: ft2_so_version,
    
    406
    -  link_args: common_ldflags,
    
    413
    +  link_args: common_ldflags + ['-lm'],
    
    407 414
     )
    
    408 415
     
    
    409 416
     
    

  • src/base/ftobjs.c
    ... ... @@ -42,6 +42,7 @@
    42 42
     #include <freetype/internal/services/svkern.h>
    
    43 43
     #include <freetype/internal/services/svtteng.h>
    
    44 44
     
    
    45
    +#include <math.h>
    
    45 46
     #include <freetype/ftdriver.h>
    
    46 47
     
    
    47 48
     #ifdef FT_CONFIG_OPTION_MAC_FONTS
    
    ... ... @@ -893,6 +894,14 @@
    893 894
     
    
    894 895
     
    
    895 896
       /* documentation is in freetype.h */
    
    897
    +  FT_EXPORT_DEF( FT_Error )
    
    898
    +  FT_Refresh_Glyph( FT_Face   face,
    
    899
    +                 FT_UInt   glyph_index)
    
    900
    +
    
    901
    +  {
    
    902
    +    ft_glyphslot_free_bitmap( face->glyph_array[glyph_index] );
    
    903
    +    face->glyph_array[glyph_index]->format = FT_GLYPH_FORMAT_OUTLINE;
    
    904
    +  }
    
    896 905
     
    
    897 906
       FT_EXPORT_DEF( FT_Error )
    
    898 907
       FT_Load_Glyph( FT_Face   face,
    
    ... ... @@ -914,7 +923,7 @@
    914 923
         /* The validity test for `glyph_index' is performed by the */
    
    915 924
         /* font drivers.                                           */
    
    916 925
     
    
    917
    -    slot = face->glyph;
    
    926
    +    slot = face->glyph_array[glyph_index];
    
    918 927
         ft_glyphslot_clear( slot );
    
    919 928
     
    
    920 929
         driver  = face->driver;
    
    ... ... @@ -1616,12 +1625,34 @@
    1616 1625
           return FT_THROW( Invalid_Argument );
    
    1617 1626
     
    
    1618 1627
         args.flags    = FT_OPEN_PATHNAME;
    
    1628
    +    args.size     = 0;
    
    1619 1629
         args.pathname = (char*)pathname;
    
    1620 1630
         args.stream   = NULL;
    
    1621 1631
     
    
    1622 1632
         return ft_open_face_internal( library, &args, face_index, aface, 1 );
    
    1623 1633
       }
    
    1624 1634
     
    
    1635
    +  FT_EXPORT_DEF( FT_Error )
    
    1636
    +  FT_New_Face2( FT_Library   library,
    
    1637
    +                const char*  pathname,
    
    1638
    +                FT_Long      face_index,
    
    1639
    +                FT_Face     *aface,
    
    1640
    +                FT_UInt      size)
    
    1641
    +  {
    
    1642
    +    FT_Open_Args  args;
    
    1643
    +
    
    1644
    +     /* test for valid `library' and `aface' delayed to `FT_Open_Face' */
    
    1645
    +     if ( !pathname )
    
    1646
    +       return FT_THROW( Invalid_Argument );
    
    1647
    +
    
    1648
    +     args.flags    = FT_OPEN_PATHNAME;
    
    1649
    +     args.size     = size;
    
    1650
    +     args.pathname = (char*)pathname;
    
    1651
    +     args.stream   = NULL;
    
    1652
    +
    
    1653
    +     return ft_open_face_internal( library, &args, face_index, aface, 1 );
    
    1654
    +  }
    
    1655
    +
    
    1625 1656
     #endif
    
    1626 1657
     
    
    1627 1658
     
    
    ... ... @@ -2519,6 +2550,306 @@
    2519 2550
       }
    
    2520 2551
     
    
    2521 2552
     
    
    2553
    +static FT_Vector
    
    2554
    +Lerp( float T, FT_Vector P0, FT_Vector P1 )
    
    2555
    +{
    
    2556
    +  FT_Vector p;
    
    2557
    +  p.x = P0.x + T * ( P1.x - P0.x );
    
    2558
    +  p.y = P0.y + T * ( P1.y - P0.y );
    
    2559
    +  return p;
    
    2560
    +}
    
    2561
    +
    
    2562
    +int conic_to2(FT_GlyphSlot* slot, FT_Vector *control, FT_Vector *from, FT_Vector *to, FT_PreLine *ptr)
    
    2563
    +{
    
    2564
    +  /*
    
    2565
    +  Calculate devsq as the square of four times the
    
    2566
    +  distance from the control point to the midpoint of the curve.
    
    2567
    +  This is the place at which the curve is furthest from the
    
    2568
    +  line joining the control points.
    
    2569
    +
    
    2570
    +  4 x point on curve = p0 + 2p1 + p2
    
    2571
    +  4 x midpoint = 4p1
    
    2572
    +
    
    2573
    +  The division by four is omitted to save time.
    
    2574
    +  */
    
    2575
    +  FT_Vector aP0 = { from->x , from->y};
    
    2576
    +  FT_Vector aP1 = { control->x, control->y };
    
    2577
    +  FT_Vector aP2 = { to->x, to->y };
    
    2578
    +
    
    2579
    +  float devx  = aP0.x - aP1.x - aP1.x + aP2.x;
    
    2580
    +  float devy  = aP0.y - aP1.y - aP1.y + aP2.y;
    
    2581
    +  float devsq = devx * devx + devy * devy;
    
    2582
    +
    
    2583
    +  if ( devsq < 0.333f )
    
    2584
    +  {
    
    2585
    +    FT_PreLine pl3       = malloc(sizeof(FT_PreLineRec));
    
    2586
    +            pl3->x1      = (*ptr)->x2;
    
    2587
    +            pl3->y1      = (*ptr)->y2;
    
    2588
    +            pl3->x2      = aP2.x;
    
    2589
    +            pl3->y2      = aP2.y;
    
    2590
    +            pl3->next    = NULL;
    
    2591
    +            (*ptr)->next = pl3;
    
    2592
    +            *ptr         = (*ptr)->next;
    
    2593
    +    return 0;
    
    2594
    +  }
    
    2595
    +
    
    2596
    +  /*
    
    2597
    +  According to Raph Levien, the reason for the subdivision by n (instead of
    
    2598
    +  recursive division by the Casteljau system) is that "I expect the flatness
    
    2599
    +  computation to be semi-expensive (it's done once rather than on each potential
    
    2600
    +  subdivision) and also because you'll often get fewer subdivisions. Taking a
    
    2601
    +  circular arc as a simplifying assumption, where I get n, a recursive approach
    
    2602
    +  would get 2^ceil(lg n), which, if I haven't made any horrible mistakes, is
    
    2603
    +  expected to be 33% more in the limit".
    
    2604
    +  */
    
    2605
    +
    
    2606
    +  const float tol = 3.0f;
    
    2607
    +  int         n   = (int)floor( sqrt( sqrt( tol * devsq ) ) )/8;
    
    2608
    +  FT_Vector p      = aP0;
    
    2609
    +  float     nrecip = 1.0f / ( n + 1.0f );
    
    2610
    +  float     t      = 0.0f;
    
    2611
    +  for ( int i = 0; i < n; i++ )
    
    2612
    +  {
    
    2613
    +    t += nrecip;
    
    2614
    +    FT_Vector next = Lerp( t, Lerp( t, aP0, aP1 ), Lerp( t, aP1, aP2 ) );
    
    2615
    +    FT_PreLine pl4  = malloc(sizeof(FT_PreLineRec));
    
    2616
    +            pl4->x1       = (*ptr)->x2;
    
    2617
    +            pl4->y1       = (*ptr)->y2;
    
    2618
    +            pl4->x2       = next.x;
    
    2619
    +            pl4->y2       = next.y;
    
    2620
    +            pl4->next     = NULL;
    
    2621
    +            (*ptr)->next  = pl4;
    
    2622
    +            *ptr          = (*ptr)->next;
    
    2623
    +            p              = next;
    
    2624
    +  }
    
    2625
    +
    
    2626
    +  FT_PreLine pl5          = malloc(sizeof(FT_PreLineRec));
    
    2627
    +            pl5->x1       = (*ptr)->x2;
    
    2628
    +            pl5->y1       = (*ptr)->y2;
    
    2629
    +            pl5->x2       = aP2.x;
    
    2630
    +            pl5->y2       = aP2.y;
    
    2631
    +            pl5->next     = NULL;
    
    2632
    +            (*ptr)->next  = pl5;
    
    2633
    +            *ptr          = (*ptr)->next;
    
    2634
    +  return 0;
    
    2635
    +}
    
    2636
    +
    
    2637
    +/**
    
    2638
    + * Convert the outline data of slot to prelines
    
    2639
    +*/
    
    2640
    +FT_Error ft_decompose_outline(FT_GlyphSlot* slot){
    
    2641
    +  FT_Vector   v_last;
    
    2642
    +  FT_Vector   v_control;
    
    2643
    +  FT_Vector   v_start;
    
    2644
    +
    
    2645
    +  FT_Vector*  point;
    
    2646
    +  FT_Vector*  limit;
    
    2647
    +  char*       tags;
    
    2648
    +
    
    2649
    +  FT_Error    error;
    
    2650
    +
    
    2651
    +  FT_Int   n;         /* index of contour in outline     */
    
    2652
    +  FT_Int   first;     /* index of first point in contour */
    
    2653
    +  FT_Int   last;      /* index of last point in contour  */
    
    2654
    +
    
    2655
    +  FT_Int   tag;       /* current point's state           */
    
    2656
    +
    
    2657
    +  FT_Int   shift;
    
    2658
    +  FT_Pos   delta;
    
    2659
    +
    
    2660
    +  FT_Outline* outline = &(*slot)->outline;
    
    2661
    +
    
    2662
    +  if ( !outline )
    
    2663
    +    return FT_THROW( Invalid_Outline );
    
    2664
    +  
    
    2665
    +  last = -1;
    
    2666
    +  FT_PreLine ptr = (*slot)->prelines;
    
    2667
    +
    
    2668
    +  for ( n = 0; n < outline->n_contours; n++ )
    
    2669
    +  {
    
    2670
    +    FT_TRACE5(( "ft_decompose_outline: Contour %d\n", n ));
    
    2671
    +
    
    2672
    +    first = last + 1;
    
    2673
    +    last  = outline->contours[n];
    
    2674
    +    if ( last < first ){
    
    2675
    +      FT_TRACE5(( "Invalid Outline"));
    
    2676
    +      break;
    
    2677
    +    }
    
    2678
    +    limit = outline->points + last;
    
    2679
    +
    
    2680
    +    v_start   = outline->points[first];
    
    2681
    +
    
    2682
    +
    
    2683
    +    v_last   = outline->points[last];
    
    2684
    +
    
    2685
    +    v_control = v_start;
    
    2686
    +
    
    2687
    +    point = outline->points + first;
    
    2688
    +    tags  = outline->tags   + first;
    
    2689
    +    tag   = FT_CURVE_TAG( tags[0] );
    
    2690
    +
    
    2691
    +    /* A contour cannot start with a cubic control point! */
    
    2692
    +    if ( tag == FT_CURVE_TAG_CUBIC )
    
    2693
    +    {
    
    2694
    +      FT_TRACE5(( "Invalid Outline"));
    
    2695
    +      break;
    
    2696
    +    }
    
    2697
    +    /* check first point to determine origin */
    
    2698
    +    if ( tag == FT_CURVE_TAG_CONIC )
    
    2699
    +    {
    
    2700
    +      /* first point is conic control.  Yes, this happens. */
    
    2701
    +      if ( FT_CURVE_TAG( outline->tags[last] ) == FT_CURVE_TAG_ON )
    
    2702
    +      {
    
    2703
    +        /* start at last point if it is on the curve */
    
    2704
    +        v_start = v_last;
    
    2705
    +        limit--;
    
    2706
    +      }
    
    2707
    +      else
    
    2708
    +      {
    
    2709
    +        /* if both first and last points are conic,         */
    
    2710
    +        /* start at their middle and record its position    */
    
    2711
    +        /* for closure                                      */
    
    2712
    +        v_start.x = ( v_start.x + v_last.x ) / 2;
    
    2713
    +        v_start.y = ( v_start.y + v_last.y ) / 2;
    
    2714
    +
    
    2715
    +      /* v_last = v_start; */
    
    2716
    +      }
    
    2717
    +      point--;
    
    2718
    +      tags--;
    
    2719
    +    }
    
    2720
    +    
    
    2721
    +    FT_TRACE5(( "  move to (%.2f, %.2f)\n",
    
    2722
    +                (double)v_start.x / 64, (double)v_start.y / 64 ));
    
    2723
    +
    
    2724
    +
    
    2725
    +    FT_PreLine pl  = malloc(sizeof(FT_PreLineRec));
    
    2726
    +          pl->x1 = v_start.x;
    
    2727
    +          pl->y1 = v_start.y;
    
    2728
    +          pl->x2 = v_start.x;
    
    2729
    +          pl->y2 = v_start.y;
    
    2730
    +          pl->next = NULL;
    
    2731
    +
    
    2732
    +          if ( ( *slot )->prelines == NULL )
    
    2733
    +          {
    
    2734
    +            ptr = ( *slot )->prelines = pl;
    
    2735
    +          }
    
    2736
    +          else
    
    2737
    +          {
    
    2738
    +            ptr->next = pl;
    
    2739
    +            ptr       = ptr->next;
    
    2740
    +          }
    
    2741
    +
    
    2742
    +    while ( point < limit )
    
    2743
    +    {
    
    2744
    +      point++;
    
    2745
    +      tags++;
    
    2746
    +
    
    2747
    +      tag = FT_CURVE_TAG( tags[0] );
    
    2748
    +      switch ( tag )
    
    2749
    +      {
    
    2750
    +      case FT_CURVE_TAG_ON:  /* emit a single line_to */
    
    2751
    +        {
    
    2752
    +          FT_Vector  vec;
    
    2753
    +
    
    2754
    +
    
    2755
    +          vec.x = point->x;
    
    2756
    +          vec.y = point->y;
    
    2757
    +
    
    2758
    +          FT_TRACE5(( "  line to (%.2f, %.2f)\n",
    
    2759
    +                      (double)vec.x / 64, (double)vec.y / 64 ));
    
    2760
    +
    
    2761
    +          FT_PreLine pl3  = malloc(sizeof(FT_PreLineRec));
    
    2762
    +          pl3->x1 = ptr->x2;
    
    2763
    +          pl3->y1 = ptr->y2;
    
    2764
    +          pl3->x2 = vec.x;
    
    2765
    +          pl3->y2 = vec.y;
    
    2766
    +          pl3->next = NULL;
    
    2767
    +          ptr->next = pl3;
    
    2768
    +          ptr = ptr->next;
    
    2769
    +          continue;
    
    2770
    +        }
    
    2771
    +      
    
    2772
    +      case FT_CURVE_TAG_CONIC:  /* consume conic arcs */
    
    2773
    +        v_control.x =  point->x ;
    
    2774
    +        v_control.y = point->y ;
    
    2775
    +
    
    2776
    +      Do_Conic:
    
    2777
    +        if ( point < limit )
    
    2778
    +        {
    
    2779
    +          FT_Vector  vec;
    
    2780
    +          FT_Vector  v_middle;
    
    2781
    +
    
    2782
    +
    
    2783
    +          point++;
    
    2784
    +          tags++;
    
    2785
    +          tag = FT_CURVE_TAG( tags[0] );
    
    2786
    +
    
    2787
    +          vec.x = point->x;
    
    2788
    +          vec.y = point->y;
    
    2789
    +
    
    2790
    +          if ( tag == FT_CURVE_TAG_ON )
    
    2791
    +          {
    
    2792
    +            FT_TRACE5(( "  conic to (%.2f, %.2f)"
    
    2793
    +                        " with control (%.2f, %.2f)\n",
    
    2794
    +                        (double)vec.x / 64,
    
    2795
    +                        (double)vec.y / 64,
    
    2796
    +                        (double)v_control.x / 64,
    
    2797
    +                        (double)v_control.y / 64 ));
    
    2798
    +            FT_Vector vex0 = {ptr->x2, ptr->y2};
    
    2799
    +            error = conic_to2(slot, &v_control, &vex0,&vec , &ptr);
    
    2800
    + 
    
    2801
    +            continue;
    
    2802
    +          }
    
    2803
    +
    
    2804
    +          if ( tag != FT_CURVE_TAG_CONIC )
    
    2805
    +          {
    
    2806
    +            FT_TRACE5( ( "Invalid Outline" ) );
    
    2807
    +            break;
    
    2808
    +          }
    
    2809
    +          v_middle.x = ( v_control.x + vec.x ) / 2;
    
    2810
    +          v_middle.y = ( v_control.y + vec.y ) / 2;
    
    2811
    +
    
    2812
    +          FT_TRACE5(( "  conic to (%.2f, %.2f)"
    
    2813
    +                      " with control (%.2f, %.2f)\n",
    
    2814
    +                      (double)v_middle.x / 64,
    
    2815
    +                      (double)v_middle.y / 64,
    
    2816
    +                      (double)v_control.x / 64,
    
    2817
    +                      (double)v_control.y / 64 ));
    
    2818
    +          FT_Vector vex = {ptr->x2, ptr->y2};
    
    2819
    +          error = conic_to2(slot, &v_control, &vex,&v_middle, &ptr);
    
    2820
    +
    
    2821
    +          v_control = vec;
    
    2822
    +          goto Do_Conic;
    
    2823
    +        }
    
    2824
    +
    
    2825
    +        FT_TRACE5(( "  conic to (%.2f, %.2f)"
    
    2826
    +                    " with control (%.2f, %.2f)\n",
    
    2827
    +                    (double)v_start.x / 64,
    
    2828
    +                    (double)v_start.y / 64,
    
    2829
    +                    (double)v_control.x / 64,
    
    2830
    +                    (double)v_control.y / 64 ));
    
    2831
    +        FT_Vector vex2 = {ptr->x2, ptr->y2};
    
    2832
    +        error = conic_to2( slot, &v_control, &vex2, &v_start, &ptr );
    
    2833
    +      }
    
    2834
    +    }
    
    2835
    +
    
    2836
    +    /* close the contour with a line segment */
    
    2837
    +    FT_TRACE5(( "  line to (%.2f, %.2f)\n",
    
    2838
    +                 (double)v_start.x / 64, (double)v_start.y / 64 ));
    
    2839
    +    FT_PreLine pl2  = malloc(sizeof(FT_PreLineRec));
    
    2840
    +    pl2->x1 = ptr->x2;
    
    2841
    +    pl2->y1 = ptr->y2;
    
    2842
    +    pl2->x2 = v_start.x;
    
    2843
    +    pl2->y2 = v_start.y;
    
    2844
    +    pl2->next = NULL;
    
    2845
    +    ptr->next = pl2;
    
    2846
    +    ptr = ptr->next;
    
    2847
    +    
    
    2848
    +  }
    
    2849
    +
    
    2850
    +  return 0;
    
    2851
    +}
    
    2852
    +
    
    2522 2853
       static FT_Error
    
    2523 2854
       ft_open_face_internal( FT_Library           library,
    
    2524 2855
                              const FT_Open_Args*  args,
    
    ... ... @@ -2748,6 +3079,33 @@
    2748 3079
     
    
    2749 3080
             face->size = size;
    
    2750 3081
           }
    
    3082
    +      if ( args->size > 0 )
    
    3083
    +      {
    
    3084
    +        face->glyph_array = (FT_GlyphSlot*)malloc(
    
    3085
    +            face->driver->clazz->slot_object_size * face->num_glyphs );
    
    3086
    +        error = FT_Set_Pixel_Sizes( face, 0, args->size );
    
    3087
    +
    
    3088
    +        for ( int gindex = 0; gindex < face->num_glyphs; gindex++ )
    
    3089
    +        {
    
    3090
    +          driver                = face->driver;
    
    3091
    +          FT_Driver_Class clazz = driver->clazz;
    
    3092
    +          memory                = driver->root.memory;
    
    3093
    +
    
    3094
    +          FT_ALLOC( face->glyph_array[gindex], clazz->slot_object_size );
    
    3095
    +          
    
    3096
    +          face->glyph_array[gindex]->face         = face;
    
    3097
    +          face->glyph_array[gindex]->prel_shifted = 0;
    
    3098
    +          face->glyph_array[gindex]->glyph_index  = gindex;
    
    3099
    +          ft_glyphslot_init( face->glyph_array[gindex] );
    
    3100
    +
    
    3101
    +          face->glyph_array[gindex]->next = NULL;
    
    3102
    +          *face->glyph                    = *face->glyph_array[gindex];
    
    3103
    +
    
    3104
    +          FT_Load_Glyph( face, gindex, FT_LOAD_NO_HINTING );
    
    3105
    +
    
    3106
    +          ft_decompose_outline( &face->glyph_array[gindex] );
    
    3107
    +        }
    
    3108
    +      }
    
    2751 3109
         }
    
    2752 3110
     
    
    2753 3111
         /* some checks */
    

  • src/dense/ftdense.c
    ... ... @@ -16,15 +16,25 @@
    16 16
         defined( __x86_64__ )                        || \
    
    17 17
         defined( _M_AMD64 )                          || \
    
    18 18
         ( defined( _M_IX86_FP ) && _M_IX86_FP >= 2 )
    
    19
    -#  define FT_SSE4_1 1
    
    19
    +  #define FT_SSE4_1 1
    
    20 20
     #else
    
    21
    -#  define FT_SSE4_1 0
    
    21
    +  #define FT_SSE4_1 0
    
    22
    +#endif
    
    23
    +
    
    24
    +#if defined(__ARM_NEON)
    
    25
    +  #define FT_NEON 1
    
    26
    +#else
    
    27
    +  #define FT_NEON 0
    
    22 28
     #endif
    
    23 29
     
    
    24 30
     
    
    25 31
     #if FT_SSE4_1
    
    26 32
     
    
    27
    -    #include <immintrin.h>
    
    33
    +  #include <immintrin.h>
    
    34
    +
    
    35
    +#elif FT_NEON
    
    36
    +
    
    37
    +  #include <arm_neon.h>
    
    28 38
     
    
    29 39
     #endif
    
    30 40
     
    
    ... ... @@ -83,6 +93,165 @@ dense_line_to( const FT_Vector* to, dense_worker* worker )
    83 93
       return 0;
    
    84 94
     }
    
    85 95
     
    
    96
    +void
    
    97
    +dense_render_line2( dense_worker* worker, FT_PreLine pl )
    
    98
    +{
    
    99
    +
    
    100
    +  FT26D6 fx = UPSCALE(pl->x1)>>2;
    
    101
    +  FT26D6 fy = UPSCALE(pl->y1)>>2;
    
    102
    +
    
    103
    +  FT26D6 from_x = fx;
    
    104
    +  FT26D6 from_y = fy;
    
    105
    +
    
    106
    +
    
    107
    +  FT26D6 tx = UPSCALE(pl->x2)>>2;
    
    108
    +  FT26D6 ty = UPSCALE(pl->y2)>>2;
    
    109
    +
    
    110
    +  if ( fy == ty )
    
    111
    +    return;
    
    112
    +
    
    113
    +  FT26D6 to_x = tx;
    
    114
    +  FT26D6 to_y = ty;
    
    115
    +
    
    116
    +  int dir = 1;
    
    117
    +  if ( from_y >= to_y )
    
    118
    +  {
    
    119
    +    dir = -1;
    
    120
    +    FT_SWAP(from_x, to_x);
    
    121
    +    FT_SWAP(from_y, to_y);
    
    122
    +  }
    
    123
    +
    
    124
    +  // Clip to the height.
    
    125
    +  if ( from_y >= worker->m_h<<6 || to_y <= 0 )
    
    126
    +    return;
    
    127
    +
    
    128
    +  FT26D6 deltax,deltay;
    
    129
    +  deltax = to_x - from_x;
    
    130
    +  deltay = to_y - from_y;
    
    131
    +
    
    132
    +    FT_UDIVPREP(from_x != to_x, deltax);
    
    133
    +
    
    134
    +    FT_UDIVPREP(from_y != to_y, deltay);
    
    135
    +
    
    136
    +  if ( from_y < 0 )
    
    137
    +  {
    
    138
    +    from_x -= from_y * deltax/deltay;
    
    139
    +    from_y = 0;
    
    140
    +  }
    
    141
    +
    
    142
    +  if ( to_y > worker->m_h<<6 )
    
    143
    +  {
    
    144
    +    to_x -= (( to_y - worker->m_h<<6 ) * deltax/deltay);
    
    145
    +    to_y = worker->m_h<<6;
    
    146
    +  }
    
    147
    +
    
    148
    +
    
    149
    +  if(deltax == 0){
    
    150
    +    FT26D6 x       = from_x;
    
    151
    +    int   x0i    = x>>6;
    
    152
    +    FT26D6 x0floor = x0i<<6;
    
    153
    +
    
    154
    +    // y-coordinate of first pixel of line
    
    155
    +    int    y0      = from_y>>6;
    
    156
    +
    
    157
    +    // y-coordinate of last pixel of line
    
    158
    +    int    y_limit = (to_y + 0x3f)>>6;
    
    159
    +    FT20D12* m_a   = worker->m_a;
    
    160
    +
    
    161
    +
    
    162
    +
    
    163
    +    for ( int y = y0; y < y_limit; y++ )
    
    164
    +    {
    
    165
    +      int linestart = y * worker->m_w;
    
    166
    +
    
    167
    +     FT26D6 dy   = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y );
    
    168
    +
    
    169
    +      m_a[linestart + x0i] += dir*dy*(64 - x + x0floor);
    
    170
    +      m_a[linestart + ( x0i + 1 )] += dir*dy*(x-x0floor);
    
    171
    +
    
    172
    +    }
    
    173
    +  }
    
    174
    +  else
    
    175
    +  {
    
    176
    +    int    x       = from_x;
    
    177
    +    int    y0      = from_y>>6;
    
    178
    +    int    y_limit = (to_y + 0x3f)>>6;
    
    179
    +
    
    180
    +    FT20D12* m_a     = worker->m_a;
    
    181
    +
    
    182
    +    for ( int y = y0; y < y_limit; y++ )
    
    183
    +    {
    
    184
    +      int   linestart = y * worker->m_w;
    
    185
    +      FT26D6 dy        = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y );
    
    186
    +      FT26D6 xnext     = x + FT_UDIV((dy*deltax), deltay);
    
    187
    +      FT26D6 d         = dy * dir;
    
    188
    +
    
    189
    +      FT26D6 x0, x1;
    
    190
    +      if ( x < xnext )
    
    191
    +      {
    
    192
    +        x0 = x;
    
    193
    +        x1 = xnext;
    
    194
    +      }
    
    195
    +      else
    
    196
    +      {
    
    197
    +        x0 = xnext;
    
    198
    +        x1 = x;
    
    199
    +      }
    
    200
    +
    
    201
    +
    
    202
    +      int   x0i    = x0>>6;
    
    203
    +      FT26D6 x0floor = x0i<<6;
    
    204
    +
    
    205
    +
    
    206
    +      int   x1i    = (x1+0x3f)>>6;
    
    207
    +      FT26D6 x1ceil =  x1i <<6;
    
    208
    +
    
    209
    +      if ( x1i <= x0i + 1 )
    
    210
    +      {
    
    211
    +        FT26D6 xmf = ( ( x + xnext )>>1) - x0floor;
    
    212
    +        m_a[linestart + x0i] += d * ((1<<6) - xmf);
    
    213
    +        m_a[linestart + ( x0i + 1 )] += d * xmf;
    
    214
    +      }
    
    215
    +      else
    
    216
    +      {
    
    217
    +
    
    218
    +        FT26D6 oneOverS = x1 - x0;
    
    219
    +
    
    220
    +        FT_UDIVPREP(x1 != x0, oneOverS);
    
    221
    +
    
    222
    +        FT26D6 x0f = x0 - x0floor;
    
    223
    +
    
    224
    +
    
    225
    +        FT26D6 oneMinusX0f = (1<<6) - x0f;
    
    226
    +        FT26D6 a0 = FT_UDIV(((oneMinusX0f * oneMinusX0f) >> 1), oneOverS);
    
    227
    +        FT26D6 x1f = x1 - x1ceil + (1<<6);
    
    228
    +        FT26D6 am =  FT_UDIV(((x1f * x1f) >> 1) , oneOverS);
    
    229
    +
    
    230
    +        m_a[linestart + x0i] += d * a0;
    
    231
    +
    
    232
    +        if ( x1i == x0i + 2 )
    
    233
    +          m_a[linestart + ( x0i + 1 )] += d * ( (1<<6) - a0 - am );
    
    234
    +        else
    
    235
    +        {
    
    236
    +          FT26D6 a1 =  FT_UDIV((((1<<6) + (1<<5) - x0f) << 6) , oneOverS);
    
    237
    +          m_a[linestart + ( x0i + 1 )] += d * ( a1 - a0 );
    
    238
    +
    
    239
    +          FT26D6 dTimesS =  FT_UDIV((d << 12) , oneOverS);
    
    240
    +
    
    241
    +          for ( FT26D6 xi = x0i + 2; xi < x1i - 1; xi++ )
    
    242
    +            m_a[linestart + xi] += dTimesS;
    
    243
    +
    
    244
    +          FT26D6 a2 = a1 +  FT_UDIV((( x1i - x0i - 3 )<<12),oneOverS);
    
    245
    +          m_a[linestart + ( x1i - 1 )] += d * ( (1<<6) - a2 - am );
    
    246
    +        }
    
    247
    +        m_a[linestart + x1i] += d * am;
    
    248
    +      }
    
    249
    +      x = xnext;
    
    250
    +    }
    
    251
    +  }
    
    252
    +}
    
    253
    +
    
    254
    +
    
    86 255
     void
    
    87 256
     dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
    
    88 257
     {
    
    ... ... @@ -154,7 +323,7 @@ dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
    154 323
         {
    
    155 324
           int linestart = y * worker->m_w;
    
    156 325
     
    
    157
    -     FT26D6 dy   = min( (y + 1)<<6, to_y ) - max( y<<6, from_y );
    
    326
    +     FT26D6 dy   = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y );
    
    158 327
     
    
    159 328
           m_a[linestart + x0i] += dir*dy*(64 - x + x0floor);
    
    160 329
           m_a[linestart + ( x0i + 1 )] += dir*dy*(x-x0floor);
    
    ... ... @@ -416,10 +585,16 @@ FT_DEFINE_OUTLINE_FUNCS( dense_decompose_funcs,
    416 585
     )
    
    417 586
     
    
    418 587
     static int
    
    419
    -dense_render_glyph( dense_worker* worker, const FT_Bitmap* target )
    
    588
    +dense_render_glyph( dense_worker* worker, const FT_Bitmap* target, FT_PreLine pl )
    
    420 589
     {
    
    421
    -  FT_Error error = FT_Outline_Decompose( &( worker->outline ),
    
    422
    -                                         &dense_decompose_funcs, worker );
    
    590
    +  FT_Error error = 0;
    
    591
    +
    
    592
    +  while (pl != NULL)
    
    593
    +  {
    
    594
    +    dense_render_line2(worker, pl);
    
    595
    +    pl = pl->next;
    
    596
    +  }
    
    597
    +
    
    423 598
       // Render into bitmap
    
    424 599
       const FT20D12* source = worker->m_a;
    
    425 600
       unsigned char* dest     = target->buffer;
    
    ... ... @@ -427,8 +602,8 @@ dense_render_glyph( dense_worker* worker, const FT_Bitmap* target )
    427 602
     
    
    428 603
     #if FT_SSE4_1
    
    429 604
     
    
    430
    -__m128i offset = _mm_setzero_si128();
    
    431
    -  __m128i mask   = _mm_set1_epi32( 0x0c080400 );
    
    605
    +  __m128i offset = _mm_setzero_si128();
    
    606
    +  __m128i nzero = _mm_castps_si128(_mm_set1_ps(-0.0));
    
    432 607
     
    
    433 608
       for (int i = 0; i < worker->m_h*worker->m_w; i += 4)
    
    434 609
       {
    
    ... ... @@ -438,34 +613,45 @@ __m128i offset = _mm_setzero_si128();
    438 613
     
    
    439 614
         x = _mm_add_epi32( x, _mm_slli_si128( x, 4 ) );
    
    440 615
     
    
    441
    -    x = _mm_add_epi32(
    
    442
    -        x, _mm_castps_si128( _mm_shuffle_ps( _mm_setzero_ps(),
    
    443
    -                                             _mm_castsi128_ps( x ), 0x40 ) ) );
    
    616
    +    x = _mm_add_epi32( x, _mm_slli_si128( x, 8 ) );
    
    444 617
     
    
    445
    -    // add the prefsum of previous 4 floats to all current floats
    
    618
    +    // add the prefix sum of previous 4 ints to all ints
    
    446 619
         x = _mm_add_epi32( x, offset );
    
    447 620
     
    
    448 621
         // take absolute value
    
    449
    -    __m128i y = _mm_abs_epi32( x );  // fabs(x)
    
    622
    +    __m128i y = _mm_srli_epi32( _mm_abs_epi32( x) , 4 );
    
    623
    +    y = _mm_packus_epi16(_mm_packs_epi32(y, nzero), nzero);
    
    624
    +    _mm_storeu_si32(&dest[i], y);
    
    625
    +
    
    626
    +    // store the current prefix sum in offset
    
    627
    +    offset = _mm_shuffle_epi32(x,_MM_SHUFFLE( 3, 3, 3, 3 ) );
    
    628
    +  }
    
    629
    +#elif FT_NEON
    
    630
    +  int32x4_t offset = vdupq_n_s32(0);
    
    631
    +  int32x4_t nzero =  vreinterpretq_s32_f32(vdupq_n_f32(-0.0));
    
    450 632
     
    
    451
    -    // cap max value to 1
    
    452
    -    y = _mm_min_epi32( y, _mm_set1_epi32( 4080 ) );
    
    633
    +  for (int i = 0; i < worker->m_h*worker->m_w; i += 4)
    
    634
    +  {
    
    635
    +    // load 4 floats from source
    
    453 636
     
    
    454
    -    // reduce to 255
    
    455
    -    y = _mm_srli_epi32( y, 4 );
    
    637
    +    int32x4_t x = vld1q_s32( (int32_t*)&source[i] );
    
    456 638
     
    
    457
    -    // shuffle
    
    458
    -    y = _mm_shuffle_epi8( y, mask );
    
    639
    +    x = vaddq_s32( x, vreinterpretq_s32_s8(vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_s32( x), 12) ));
    
    459 640
     
    
    460
    -    _mm_store_ss( (float*)&dest[i], (__m128)y );
    
    641
    +    x = vaddq_s32(x, vreinterpretq_s32_s8(vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_s32(x), 8)));
    
    461 642
     
    
    462
    -    // store the current prefix sum in offset
    
    463
    -    offset = _mm_castps_si128( _mm_shuffle_ps( _mm_castsi128_ps( x ),
    
    464
    -                                               _mm_castsi128_ps( x ),
    
    465
    -                                               _MM_SHUFFLE( 3, 3, 3, 3 ) ) );
    
    466
    -  }
    
    643
    +    // add the prefsum of previous 4 floats to all current floats
    
    644
    +    x = vaddq_s32( x, offset );
    
    645
    +
    
    646
    +    int32x4_t y = vshrq_n_s32( vabsq_s32( x) , 4 );
    
    647
    +    y = vreinterpretq_s32_s16(vcombine_s16(vqmovn_s32(y), vqmovn_s32(nzero)));
    
    648
    +    y = vreinterpretq_s32_u8(vcombine_u8(vqmovun_s16(vreinterpretq_s16_s32(y)), vqmovun_s16(vreinterpretq_s16_s32(nzero))));
    
    649
    +
    
    650
    +    vst1q_s32(&dest[i], y);
    
    467 651
     
    
    468
    -#else /* FT_SSE4_1 */
    
    652
    +    offset = vdupq_laneq_s32(x,3 );
    
    653
    +  }
    
    654
    +#else
    
    469 655
     
    
    470 656
       FT20D12 value = 0;
    
    471 657
     
    
    ... ... @@ -484,7 +670,7 @@ __m128i offset = _mm_setzero_si128();
    484 670
         dest++;
    
    485 671
       }
    
    486 672
     
    
    487
    -#endif /* FT_SSE4_1 */
    
    673
    +#endif /* FT_SSE4_1 || FT_NEON */
    
    488 674
     
    
    489 675
       free(worker->m_a);
    
    490 676
       return error;
    
    ... ... @@ -495,6 +681,7 @@ dense_raster_render( FT_Raster raster, const FT_Raster_Params* params )
    495 681
     {
    
    496 682
       const FT_Outline* outline    = (const FT_Outline*)params->source;
    
    497 683
       FT_Bitmap*  target_map = params->target;
    
    684
    +  FT_PreLine pl = params->prelines;
    
    498 685
     
    
    499 686
       dense_worker worker[1];
    
    500 687
     
    
    ... ... @@ -521,7 +708,7 @@ dense_raster_render( FT_Raster raster, const FT_Raster_Params* params )
    521 708
       worker->m_w = target_map->pitch;
    
    522 709
       worker->m_h = target_map->rows;
    
    523 710
     
    
    524
    -  int size = worker->m_w * worker->m_h + 4;
    
    711
    +  int size = (worker->m_w * worker->m_h + 3) & ~3;
    
    525 712
     
    
    526 713
       worker->m_a      = malloc( sizeof( FT20D12 ) * size );
    
    527 714
       worker->m_a_size = size;
    
    ... ... @@ -536,7 +723,7 @@ dense_raster_render( FT_Raster raster, const FT_Raster_Params* params )
    536 723
       // Invert the pitch to account for different +ve y-axis direction in dense array
    
    537 724
       // (maybe temporary solution)
    
    538 725
       target_map->pitch *= -1;
    
    539
    -  return dense_render_glyph( worker, target_map );
    
    726
    +  return dense_render_glyph( worker, target_map, pl );
    
    540 727
     }
    
    541 728
     
    
    542 729
     FT_DEFINE_RASTER_FUNCS(
    

  • src/dense/ftdenserend.c
    ... ... @@ -139,7 +139,8 @@
    139 139
     
    
    140 140
     
    
    141 141
         /* allocate new one */
    
    142
    -    if ( FT_ALLOC_MULT( bitmap->buffer, bitmap->rows, bitmap->pitch ) )
    
    142
    +    // ARM NEON crashes if memory is not aligned
    
    143
    +    if ( FT_ALLOC_MULT( bitmap->buffer, 1,bitmap->rows*bitmap->pitch + 16 ) )
    
    143 144
           goto Exit;
    
    144 145
     
    
    145 146
         slot->internal->flags |= FT_GLYPH_OWN_BITMAP;
    
    ... ... @@ -161,12 +162,25 @@
    161 162
         }
    
    162 163
     
    
    163 164
         /* translate outline to render it into the bitmap */
    
    164
    -    if ( x_shift || y_shift )
    
    165
    -      FT_Outline_Translate( outline, x_shift, y_shift );
    
    165
    +    if ( (x_shift || y_shift) && !slot->prel_shifted){
    
    166
    +      //FT_Outline_Translate( outline, x_shift, y_shift );
    
    167
    +      FT_PreLine pl = slot->prelines;
    
    168
    +      while (pl!=NULL)
    
    169
    +      {
    
    170
    +        pl->x1 += x_shift;
    
    171
    +        pl->y1 += y_shift;
    
    172
    +        pl->x2 += x_shift;
    
    173
    +        pl->y2 += y_shift;
    
    174
    +
    
    175
    +        pl = pl->next;
    
    176
    +      }
    
    177
    +      slot->prel_shifted = 1;
    
    178
    +    }
    
    166 179
     
    
    167 180
         /* set up parameters */
    
    168 181
         params.target = bitmap;
    
    169 182
         params.source = outline;
    
    183
    +    params.prelines = slot->prelines;
    
    170 184
     
    
    171 185
         /* render the outline */
    
    172 186
         error =
    
    ... ... @@ -184,8 +198,8 @@
    184 198
           slot->internal->flags &= ~FT_GLYPH_OWN_BITMAP;
    
    185 199
         }
    
    186 200
     
    
    187
    -    if ( x_shift || y_shift )
    
    188
    -      FT_Outline_Translate( outline, -x_shift, -y_shift );
    
    201
    +    // if ( x_shift || y_shift )
    
    202
    +    //   FT_Outline_Translate( outline, -x_shift, -y_shift );
    
    189 203
     
    
    190 204
         return error;
    
    191 205
       }
    

  • src/dense/rules.mk
    ... ... @@ -24,7 +24,7 @@ DENSE_COMPILE := $(CC) $(ANSIFLAGS) \
    24 24
                             $I$(subst /,$(COMPILER_SEP),$(DENSE_DIR)) \
    
    25 25
                             $(INCLUDE_FLAGS)                          \
    
    26 26
                             $(FT_CFLAGS)                              \
    
    27
    -                        "-msse4.1"
    
    27
    +                        "-march=native"
    
    28 28
     
    
    29 29
     # DENSE driver sources (i.e., C files)
    
    30 30
     #
    


  • reply via email to

    [Prev in Thread] Current Thread [Next in Thread]