freetype-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Git][freetype/freetype][master] [smooth] Faster bitmap sweeping.


From: Alexei Podtelezhnikov (@apodtele)
Subject: [Git][freetype/freetype][master] [smooth] Faster bitmap sweeping.
Date: Wed, 12 May 2021 04:06:02 +0000

Alexei Podtelezhnikov pushed to branch master at FreeType / FreeType

Commits:

2 changed files:

Changes:

  • ChangeLog
    1
    +2021-05-07  Alexei Podtelezhnikov  <apodtele@gmail.com>
    
    2
    +
    
    3
    +	[smooth] Faster bitmap sweeping.
    
    4
    +
    
    5
    +	Selecting the fill rule or checking the direct mode each time we call
    
    6
    +	`gray_hline' is sub-optimal.  This effectively splits the direct mode
    
    7
    +	into a separate code path while inlining `gray_hline' and saving 5-7%
    
    8
    +	of rendering time.
    
    9
    +
    
    10
    +	* src/smooth/ftgrays.c (gray_hline): Eliminated in favor of...
    
    11
    +	(FT_FILL_RULE, FT_GRAY_SET): ... these new macros...
    
    12
    +	(gray_sweep): ... inlined here.
    
    13
    +	(gray_sweep_direct): New function that handles the direct span buffer.
    
    14
    +	(gray_TWorker): Remove the span buffer.
    
    15
    +	(gray_raster_render, gray_convert_glyph): Updated.
    
    16
    +
    
    1 17
     2021-05-10  Alexei Podtelezhnikov  <apodtele@gmail.com>
    
    2 18
     
    
    3 19
     	* src/smooth/ftgrays.c (gray_hline): Simplify even-odd computations.
    

  • src/smooth/ftgrays.c
    ... ... @@ -392,6 +392,42 @@ typedef ptrdiff_t FT_PtrDist;
    392 392
                 ( sizeof( long ) * FT_CHAR_BIT - PIXEL_BITS ) )
    
    393 393
     
    
    394 394
     
    
    395
    +  /* Scale area and apply fill rule to calculate the coverage byte. */
    
    396
    +  /* The top fill bit is used for the non-zero rule. The eighth     */
    
    397
    +  /* fill bit is used for the even-odd rule.  The higher coverage   */
    
    398
    +  /* bytes are either clamped for the non-zero-rule or discarded    */
    
    399
    +  /* later for the even-odd rule.                                   */
    
    400
    +#define FT_FILL_RULE( coverage, area, fill )                \
    
    401
    +  FT_BEGIN_STMNT                                            \
    
    402
    +    coverage = (int)( area >> ( PIXEL_BITS * 2 + 1 - 8 ) ); \
    
    403
    +    if ( coverage & fill )                                  \
    
    404
    +      coverage = ~coverage;                                 \
    
    405
    +    if ( coverage > 255 && fill & INT_MIN )                 \
    
    406
    +      coverage = 255;                                       \
    
    407
    +  FT_END_STMNT
    
    408
    +
    
    409
    +
    
    410
    +  /* It is faster to write small spans byte-by-byte than calling     */
    
    411
    +  /* `memset'.  This is mainly due to the cost of the function call. */
    
    412
    +#define FT_GRAY_SET( d, s, count )           \
    
    413
    +  FT_BEGIN_STMNT                             \
    
    414
    +    unsigned char* q = d;                    \
    
    415
    +    unsigned char  c = (unsigned char)s;     \
    
    416
    +    switch ( count )                         \
    
    417
    +    {                                        \
    
    418
    +      case 7: *q++ = c; /* fall through */   \
    
    419
    +      case 6: *q++ = c; /* fall through */   \
    
    420
    +      case 5: *q++ = c; /* fall through */   \
    
    421
    +      case 4: *q++ = c; /* fall through */   \
    
    422
    +      case 3: *q++ = c; /* fall through */   \
    
    423
    +      case 2: *q++ = c; /* fall through */   \
    
    424
    +      case 1: *q   = c; /* fall through */   \
    
    425
    +      case 0: break;                         \
    
    426
    +      default: FT_MEM_SET( d, s, count );    \
    
    427
    +    }                                        \
    
    428
    +  FT_END_STMNT
    
    429
    +
    
    430
    +
    
    395 431
       /**************************************************************************
    
    396 432
        *
    
    397 433
        * TYPE DEFINITIONS
    
    ... ... @@ -463,8 +499,6 @@ typedef ptrdiff_t FT_PtrDist;
    463 499
     
    
    464 500
         FT_Raster_Span_Func  render_span;
    
    465 501
         void*                render_span_data;
    
    466
    -    FT_Span              spans[FT_MAX_GRAY_SPANS];
    
    467
    -    int                  num_spans;
    
    468 502
     
    
    469 503
       } gray_TWorker, *gray_PWorker;
    
    470 504
     
    
    ... ... @@ -1171,94 +1205,62 @@ typedef ptrdiff_t FT_PtrDist;
    1171 1205
     
    
    1172 1206
     
    
    1173 1207
       static void
    
    1174
    -  gray_hline( RAS_ARG_ TCoord  x,
    
    1175
    -                       TCoord  y,
    
    1176
    -                       TArea   coverage,
    
    1177
    -                       TCoord  acount )
    
    1208
    +  gray_sweep( RAS_ARG )
    
    1178 1209
       {
    
    1179
    -    /* scale the coverage from 0..(ONE_PIXEL*ONE_PIXEL*2) to 0..256  */
    
    1180
    -    coverage >>= PIXEL_BITS * 2 + 1 - 8;
    
    1210
    +    int  fill = ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ? 0x100 : INT_MIN;
    
    1211
    +    int  coverage;
    
    1212
    +    int  y;
    
    1181 1213
     
    
    1182
    -    /* compute the line's coverage depending on the outline fill rule */
    
    1183
    -    if ( ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL )
    
    1184
    -    {
    
    1185
    -      if ( coverage & 256 )  /* odd bit */
    
    1186
    -        coverage = ~coverage;
    
    1187 1214
     
    
    1188
    -      /* higher bits discarded below */
    
    1189
    -    }
    
    1190
    -    else  /* default non-zero winding rule */
    
    1215
    +    for ( y = ras.min_ey; y < ras.max_ey; y++ )
    
    1191 1216
         {
    
    1192
    -      if ( coverage < 0 )
    
    1193
    -        coverage = ~coverage;  /* the same as -coverage - 1 */
    
    1217
    +      PCell   cell  = ras.ycells[y - ras.min_ey];
    
    1218
    +      TCoord  x     = ras.min_ex;
    
    1219
    +      TArea   cover = 0;
    
    1220
    +      TArea   area;
    
    1194 1221
     
    
    1195
    -      if ( coverage >= 256 )
    
    1196
    -        coverage = 255;
    
    1197
    -    }
    
    1222
    +      unsigned char*  line = ras.target.origin - ras.target.pitch * y;
    
    1198 1223
     
    
    1199
    -    if ( ras.num_spans >= 0 )  /* for FT_RASTER_FLAG_DIRECT only */
    
    1200
    -    {
    
    1201
    -      FT_Span*  span = ras.spans + ras.num_spans++;
    
    1202 1224
     
    
    1225
    +      for ( ; cell != NULL; cell = cell->next )
    
    1226
    +      {
    
    1227
    +        if ( cover != 0 && cell->x > x )
    
    1228
    +        {
    
    1229
    +          FT_FILL_RULE( coverage, cover, fill );
    
    1230
    +          FT_GRAY_SET( line + x, coverage, cell->x - x );
    
    1231
    +        }
    
    1203 1232
     
    
    1204
    -      span->x        = (short)x;
    
    1205
    -      span->len      = (unsigned short)acount;
    
    1206
    -      span->coverage = (unsigned char)coverage;
    
    1233
    +        cover += (TArea)cell->cover * ( ONE_PIXEL * 2 );
    
    1234
    +        area   = cover - cell->area;
    
    1207 1235
     
    
    1208
    -      if ( ras.num_spans == FT_MAX_GRAY_SPANS )
    
    1209
    -      {
    
    1210
    -        /* flush the span buffer and reset the count */
    
    1211
    -        ras.render_span( y, ras.num_spans, ras.spans, ras.render_span_data );
    
    1212
    -        ras.num_spans = 0;
    
    1213
    -      }
    
    1214
    -    }
    
    1215
    -    else
    
    1216
    -    {
    
    1217
    -      unsigned char*  q = ras.target.origin - ras.target.pitch * y + x;
    
    1218
    -      unsigned char   c = (unsigned char)coverage;
    
    1236
    +        if ( area != 0 && cell->x >= ras.min_ex )
    
    1237
    +        {
    
    1238
    +          FT_FILL_RULE( coverage, area, fill );
    
    1239
    +          line[cell->x] = (unsigned char)coverage;
    
    1240
    +        }
    
    1219 1241
     
    
    1242
    +        x = cell->x + 1;
    
    1243
    +      }
    
    1220 1244
     
    
    1221
    -      /* For small-spans it is faster to do it by ourselves than
    
    1222
    -       * calling `memset'.  This is mainly due to the cost of the
    
    1223
    -       * function call.
    
    1224
    -       */
    
    1225
    -      switch ( acount )
    
    1245
    +      if ( cover != 0 )  /* only if cropped */
    
    1226 1246
           {
    
    1227
    -      case 7:
    
    1228
    -        *q++ = c;
    
    1229
    -        /* fall through */
    
    1230
    -      case 6:
    
    1231
    -        *q++ = c;
    
    1232
    -        /* fall through */
    
    1233
    -      case 5:
    
    1234
    -        *q++ = c;
    
    1235
    -        /* fall through */
    
    1236
    -      case 4:
    
    1237
    -        *q++ = c;
    
    1238
    -        /* fall through */
    
    1239
    -      case 3:
    
    1240
    -        *q++ = c;
    
    1241
    -        /* fall through */
    
    1242
    -      case 2:
    
    1243
    -        *q++ = c;
    
    1244
    -        /* fall through */
    
    1245
    -      case 1:
    
    1246
    -        *q = c;
    
    1247
    -        /* fall through */
    
    1248
    -      case 0:
    
    1249
    -        break;
    
    1250
    -      default:
    
    1251
    -        FT_MEM_SET( q, c, acount );
    
    1247
    +        FT_FILL_RULE( coverage, cover, fill );
    
    1248
    +        FT_GRAY_SET( line + x, coverage, ras.max_ex - x );
    
    1252 1249
           }
    
    1253 1250
         }
    
    1254 1251
       }
    
    1255 1252
     
    
    1256 1253
     
    
    1257 1254
       static void
    
    1258
    -  gray_sweep( RAS_ARG )
    
    1255
    +  gray_sweep_direct( RAS_ARG )
    
    1259 1256
       {
    
    1257
    +    int  fill = ras.outline.flags & FT_OUTLINE_EVEN_ODD_FILL ? 0x100 : INT_MIN;
    
    1258
    +    int  coverage;
    
    1260 1259
         int  y;
    
    1261 1260
     
    
    1261
    +    FT_Span  span[FT_MAX_GRAY_SPANS];
    
    1262
    +    int      n = 0;
    
    1263
    +
    
    1262 1264
     
    
    1263 1265
         for ( y = ras.min_ey; y < ras.max_ey; y++ )
    
    1264 1266
         {
    
    ... ... @@ -1271,25 +1273,59 @@ typedef ptrdiff_t FT_PtrDist;
    1271 1273
           for ( ; cell != NULL; cell = cell->next )
    
    1272 1274
           {
    
    1273 1275
             if ( cover != 0 && cell->x > x )
    
    1274
    -          gray_hline( RAS_VAR_ x, y, cover, cell->x - x );
    
    1276
    +        {
    
    1277
    +          FT_FILL_RULE( coverage, cover, fill );
    
    1278
    +
    
    1279
    +          span[n].coverage = (unsigned char)coverage;
    
    1280
    +          span[n].x        = (short)x;
    
    1281
    +          span[n].len      = (unsigned short)( cell->x - x );
    
    1282
    +
    
    1283
    +          if ( ++n == FT_MAX_GRAY_SPANS )
    
    1284
    +          {
    
    1285
    +            /* flush the span buffer and reset the count */
    
    1286
    +            ras.render_span( y, n, span, ras.render_span_data );
    
    1287
    +            n = 0;
    
    1288
    +          }
    
    1289
    +        }
    
    1275 1290
     
    
    1276 1291
             cover += (TArea)cell->cover * ( ONE_PIXEL * 2 );
    
    1277 1292
             area   = cover - cell->area;
    
    1278 1293
     
    
    1279 1294
             if ( area != 0 && cell->x >= ras.min_ex )
    
    1280
    -          gray_hline( RAS_VAR_ cell->x, y, area, 1 );
    
    1295
    +        {
    
    1296
    +          FT_FILL_RULE( coverage, area, fill );
    
    1297
    +
    
    1298
    +          span[n].coverage = (unsigned char)coverage;
    
    1299
    +          span[n].x        = (short)cell->x;
    
    1300
    +          span[n].len      = 1;
    
    1301
    +
    
    1302
    +          if ( ++n == FT_MAX_GRAY_SPANS )
    
    1303
    +          {
    
    1304
    +            /* flush the span buffer and reset the count */
    
    1305
    +            ras.render_span( y, n, span, ras.render_span_data );
    
    1306
    +            n = 0;
    
    1307
    +          }
    
    1308
    +        }
    
    1281 1309
     
    
    1282 1310
             x = cell->x + 1;
    
    1283 1311
           }
    
    1284 1312
     
    
    1285
    -      if ( cover != 0 )
    
    1286
    -        gray_hline( RAS_VAR_ x, y, cover, ras.max_ex - x );
    
    1313
    +      if ( cover != 0 )  /* only if cropped */
    
    1314
    +      {
    
    1315
    +        FT_FILL_RULE( coverage, cover, fill );
    
    1316
    +
    
    1317
    +        span[n].coverage = (unsigned char)coverage;
    
    1318
    +        span[n].x        = (short)x;
    
    1319
    +        span[n].len      = (unsigned short)( ras.max_ex - x );
    
    1320
    +
    
    1321
    +        ++n;
    
    1322
    +      }
    
    1287 1323
     
    
    1288
    -      if ( ras.num_spans > 0 )  /* for FT_RASTER_FLAG_DIRECT only */
    
    1324
    +      if ( n )
    
    1289 1325
           {
    
    1290 1326
             /* flush the span buffer and reset the count */
    
    1291
    -        ras.render_span( y, ras.num_spans, ras.spans, ras.render_span_data );
    
    1292
    -        ras.num_spans = 0;
    
    1327
    +        ras.render_span( y, n, span, ras.render_span_data );
    
    1328
    +        n = 0;
    
    1293 1329
           }
    
    1294 1330
         }
    
    1295 1331
       }
    
    ... ... @@ -1688,7 +1724,10 @@ typedef ptrdiff_t FT_PtrDist;
    1688 1724
     
    
    1689 1725
             if ( !error )
    
    1690 1726
             {
    
    1691
    -          gray_sweep( RAS_VAR );
    
    1727
    +          if ( ras.render_span )  /* for FT_RASTER_FLAG_DIRECT only */
    
    1728
    +            gray_sweep_direct( RAS_VAR );
    
    1729
    +          else
    
    1730
    +            gray_sweep( RAS_VAR );
    
    1692 1731
               band--;
    
    1693 1732
               continue;
    
    1694 1733
             }
    
    ... ... @@ -1757,7 +1796,6 @@ typedef ptrdiff_t FT_PtrDist;
    1757 1796
     
    
    1758 1797
           ras.render_span      = (FT_Raster_Span_Func)params->gray_spans;
    
    1759 1798
           ras.render_span_data = params->user;
    
    1760
    -      ras.num_spans        = 0;
    
    1761 1799
     
    
    1762 1800
           ras.min_ex = params->clip_box.xMin;
    
    1763 1801
           ras.min_ey = params->clip_box.yMin;
    
    ... ... @@ -1787,7 +1825,6 @@ typedef ptrdiff_t FT_PtrDist;
    1787 1825
     
    
    1788 1826
           ras.render_span      = (FT_Raster_Span_Func)NULL;
    
    1789 1827
           ras.render_span_data = NULL;
    
    1790
    -      ras.num_spans        = -1;  /* invalid */
    
    1791 1828
     
    
    1792 1829
           ras.min_ex = 0;
    
    1793 1830
           ras.min_ey = 0;
    


  • reply via email to

    [Prev in Thread] Current Thread [Next in Thread]