... |
... |
@@ -58,6 +58,22 @@ dense_line_to( const FT_Vector* to, dense_worker* worker ) |
58
|
58
|
dense_move_to( to, worker );
|
59
|
59
|
return 0;
|
60
|
60
|
}
|
|
61
|
+
|
|
62
|
+FT26D6 max(FT26D6 x, FT26D6 y){
|
|
63
|
+ if(x > y){
|
|
64
|
+ return x;
|
|
65
|
+ }
|
|
66
|
+
|
|
67
|
+ return y;
|
|
68
|
+}
|
|
69
|
+
|
|
70
|
+FT26D6 min(FT26D6 x, FT26D6 y){
|
|
71
|
+ if(x < y){
|
|
72
|
+ return x;
|
|
73
|
+ }
|
|
74
|
+
|
|
75
|
+ return y;
|
|
76
|
+}
|
61
|
77
|
void
|
62
|
78
|
swap( long int* a, long int* b )
|
63
|
79
|
{
|
... |
... |
@@ -72,69 +88,81 @@ dense_render_line( dense_worker* worker, TPos tox, TPos toy ) |
72
|
88
|
{
|
73
|
89
|
// printf("line from: %d, %d to %d, %d\n", worker->prev_x, worker->prev_y,
|
74
|
90
|
// to_x, to_y);
|
75
|
|
- float from_x = worker->prev_x;
|
76
|
|
- float from_y = worker->prev_y;
|
|
91
|
+ FT26D6 from_x = worker->prev_x;
|
|
92
|
+ FT26D6 from_y = worker->prev_y;
|
|
93
|
+
|
77
|
94
|
if ( from_y == toy )
|
78
|
95
|
return;
|
79
|
96
|
|
80
|
|
- // aP0.m_x -= worker->m_origin_x;
|
81
|
|
- // aP0.m_y -= worker->m_origin_y;
|
82
|
|
- // aP1.m_x -= worker->m_origin_x;
|
83
|
|
- // aP1.m_y -= worker->m_origin_y;
|
84
|
|
-
|
85
|
|
- // from_x = TRUNC( (int)from_x );
|
86
|
|
- // from_y = TRUNC( (int)from_y );
|
87
|
|
- // to_x = TRUNC( (int)to_x );
|
88
|
|
- // to_y = TRUNC( (int)to_y );
|
89
|
97
|
|
90
|
|
- from_x /= 256.0;
|
91
|
|
- from_y /= 256.0;
|
92
|
|
- float to_x = tox / 256.0;
|
93
|
|
- float to_y = toy / 256.0;
|
|
98
|
+ from_x >>= 2;
|
|
99
|
+ from_y >>= 2;
|
|
100
|
+ FT26D6 to_x = tox >> 2;
|
|
101
|
+ FT26D6 to_y = toy >> 2;
|
94
|
102
|
|
95
|
103
|
|
96
|
|
- //printf("line from: %f, %f to %f, %f\n", from_x, from_y, to_x, to_y);
|
|
104
|
+ //printf("line from: %d, %d to %d, %d\n", from_x, from_y, to_x, to_y);
|
97
|
105
|
|
98
|
|
- float dir;
|
99
|
|
- if ( from_y < to_y )
|
100
|
|
- dir = 1;
|
101
|
|
- else
|
102
|
|
- {
|
103
|
|
- dir = -1;
|
|
106
|
+ FT26D6 dir = 1;
|
|
107
|
+ if ( from_y >= to_y ){
|
|
108
|
+ dir = -dir;
|
104
|
109
|
swap( &from_x, &to_x );
|
105
|
110
|
swap( &from_y, &to_y );
|
106
|
111
|
}
|
107
|
112
|
|
|
113
|
+
|
108
|
114
|
// Clip to the height.
|
109
|
|
- if ( from_y >= worker->m_h || to_y <= 0 )
|
|
115
|
+ if ( (from_y>>6) >= worker->m_h || to_y <= 0 )
|
110
|
116
|
return;
|
111
|
117
|
|
112
|
|
- float dxdy = ( to_x - from_x ) / (float)( to_y - from_y );
|
|
118
|
+ //float dxdy = ( to_x - from_x ) / (float)( to_y - from_y );
|
|
119
|
+ FT26D6 deltax = to_x - from_x;
|
|
120
|
+ FT26D6 deltay = to_y - from_y;
|
|
121
|
+
|
113
|
122
|
if ( from_y < 0 )
|
114
|
123
|
{
|
115
|
|
- from_x -= from_y * dxdy;
|
|
124
|
+ from_x -= from_y * deltax/deltay;
|
116
|
125
|
from_y = 0;
|
117
|
126
|
}
|
118
|
|
- if ( to_y > worker->m_h )
|
|
127
|
+
|
|
128
|
+ if ( (to_y>>6) > worker->m_h )
|
119
|
129
|
{
|
120
|
|
- to_x -= ( to_y - worker->m_h ) * dxdy;
|
121
|
|
- to_y = (float)worker->m_h;
|
|
130
|
+ to_x -= ( to_y - (worker->m_h<<6) ) * deltax/deltay;
|
|
131
|
+ to_y = worker->m_h<<6;
|
|
132
|
+ }
|
|
133
|
+
|
|
134
|
+ //float x = from_x;
|
|
135
|
+ FT26D6 x = from_x;
|
|
136
|
+
|
|
137
|
+ // int y0 = (int)from_y;
|
|
138
|
+ // int y_limit = (int)ceil( to_y );
|
|
139
|
+ int y0 = from_y>>6;
|
|
140
|
+ int y_limit = ( to_y + 0x3f)>>6;
|
|
141
|
+
|
|
142
|
+ // printf("y0, ylimit: %d, %d\n", y0, y_limit);
|
|
143
|
+
|
|
144
|
+ if(y_limit > worker->m_h){
|
|
145
|
+ y_limit = worker->m_h;
|
|
146
|
+
|
122
|
147
|
}
|
123
|
148
|
|
124
|
|
- float x = from_x;
|
125
|
|
- int y0 = (int)from_y;
|
126
|
|
- int y_limit = (int)ceil( to_y );
|
127
|
|
- float* m_a = worker->m_a;
|
|
149
|
+ FT20D12* m_a = worker->m_a;
|
128
|
150
|
|
129
|
151
|
for ( int y = y0; y < y_limit; y++ )
|
130
|
152
|
{
|
131
|
153
|
// printf("y is %d\n", y);
|
132
|
154
|
int linestart = y * worker->m_w;
|
133
|
|
- float dy = fmin( y + 1.0f, to_y ) - fmax( (float)y, from_y );
|
134
|
|
- float xnext = x + dxdy * dy;
|
135
|
|
- float d = dy * dir;
|
|
155
|
+ //float dy = fmin( y + 1.0f, to_y ) - fmax( (float)y, from_y );
|
|
156
|
+ FT26D6 dy = min((y+1)<<6, to_y) - max(y<<6, from_y);
|
|
157
|
+
|
|
158
|
+ printf("dy: %f\n", (float)dy/(1<<6));
|
|
159
|
+ //float xnext = x + dxdy * dy;
|
|
160
|
+ FT26D6 xnext = x + dy*deltax/deltay;
|
136
|
161
|
|
137
|
|
- float x0, x1;
|
|
162
|
+ //float d = dy * dir;
|
|
163
|
+ int d = dy*dir;
|
|
164
|
+
|
|
165
|
+ FT26D6 x0, x1;
|
138
|
166
|
if ( x < xnext )
|
139
|
167
|
{
|
140
|
168
|
x0 = x;
|
... |
... |
@@ -151,36 +179,64 @@ dense_render_line( dense_worker* worker, TPos tox, TPos toy ) |
151
|
179
|
floating-point inaccuracy That would cause an out-of-bounds array access at
|
152
|
180
|
index -1.
|
153
|
181
|
*/
|
154
|
|
- float x0floor = x0 <= 0.0f ? 0.0f : (float)floor( x0 );
|
|
182
|
+ //float x0floor = x0 <= 0.0f ? 0.0f : (float)floor( x0 );
|
|
183
|
+
|
|
184
|
+ //int x0i = (int)x0floor;
|
|
185
|
+ int x0i = x0 >>6;
|
155
|
186
|
|
156
|
|
- int x0i = (int)x0floor;
|
157
|
|
- float x1ceil = (float)ceil( x1 );
|
158
|
|
- int x1i = (int)x1ceil;
|
|
187
|
+ //float x1ceil = (float)ceil( x1 );
|
|
188
|
+ FT26D6 x0floor = x0i<<6;
|
|
189
|
+
|
|
190
|
+ //int x1i = (int)x1ceil;
|
|
191
|
+ int x1i = (x1+0x3f)>>6;
|
|
192
|
+
|
|
193
|
+ FT26D6 x1ceil = x1i<<6;
|
159
|
194
|
if ( x1i <= x0i + 1 )
|
160
|
195
|
{
|
161
|
|
- float xmf = 0.5f * ( x + xnext ) - x0floor;
|
162
|
|
- m_a[linestart + x0i] += d - d * xmf;
|
|
196
|
+ //float xmf = 0.5f * ( x + xnext ) - x0floor;
|
|
197
|
+ FT26D6 xmf = ( x + xnext )>>1 - x0floor;
|
|
198
|
+ // m_a[linestart + x0i] += d - d * xmf;
|
|
199
|
+ m_a[linestart + x0i] += d * (1<<6 - xmf);
|
|
200
|
+ // m_a[linestart + ( x0i + 1 )] += d * xmf;
|
163
|
201
|
m_a[linestart + ( x0i + 1 )] += d * xmf;
|
164
|
202
|
}
|
165
|
203
|
else
|
166
|
204
|
{
|
167
|
|
- float s = 1.0f / ( x1 - x0 );
|
168
|
|
- float x0f = x0 - x0floor;
|
169
|
|
- float a0 = 0.5f * s * ( 1.0f - x0f ) * ( 1.0f - x0f );
|
170
|
|
- float x1f = x1 - x1ceil + 1.0f;
|
171
|
|
- float am = 0.5f * s * x1f * x1f;
|
|
205
|
+ //float s = 1.0f / ( x1 - x0 );
|
|
206
|
+ FT26D6 oneOverS = x1 - x0;
|
|
207
|
+ //float x0f = x0 - x0floor;
|
|
208
|
+ FT26D6 x0f = x0 - x0floor;
|
|
209
|
+ FT26D6 oneMinusX0 = 1<<6 - x0f;
|
|
210
|
+
|
|
211
|
+ //float a0 = 0.5f * s * ( 1.0f - x0f ) * ( 1.0f - x0f );
|
|
212
|
+ int a0 = ((oneMinusX0 * oneMinusX0) >> 1) / oneOverS;
|
|
213
|
+
|
|
214
|
+ //float x1f = x1 - x1ceil + 1.0f;
|
|
215
|
+ FT26D6 x1f = x1 - x1ceil + 1<<6;
|
|
216
|
+
|
|
217
|
+ //float am = 0.5f * s * x1f * x1f;
|
|
218
|
+ int am = ((x1f * x1f) >> 1) / oneOverS;
|
|
219
|
+
|
172
|
220
|
m_a[linestart + x0i] += d * a0;
|
|
221
|
+
|
173
|
222
|
if ( x1i == x0i + 2 )
|
174
|
|
- m_a[linestart + ( x0i + 1 )] += d * ( 1.0f - a0 - am );
|
|
223
|
+ m_a[linestart + ( x0i + 1 )] += d * ( 1<<6 - a0 - am );
|
175
|
224
|
else
|
176
|
225
|
{
|
177
|
|
- float a1 = s * ( 1.5f - x0f );
|
|
226
|
+ //float a1 = s * ( 1.5f - x0f );
|
|
227
|
+ int a1 = ((1<<6 + 1<<5 - x0f) << 6) / oneOverS;
|
|
228
|
+
|
178
|
229
|
m_a[linestart + ( x0i + 1 )] += d * ( a1 - a0 );
|
|
230
|
+
|
|
231
|
+ int dTimesS = (d << 12) / oneOverS;
|
179
|
232
|
for ( int xi = x0i + 2; xi < x1i - 1; xi++ )
|
180
|
|
- m_a[linestart + xi] += d * s;
|
181
|
|
- float a2 = a1 + ( x1i - x0i - 3 ) * s;
|
|
233
|
+ m_a[linestart + xi] += dTimesS;
|
|
234
|
+
|
|
235
|
+ float a2 = a1 + (( x1i - x0i - 3 )<<12) / oneOverS;
|
|
236
|
+
|
182
|
237
|
m_a[linestart + ( x1i - 1 )] += d * ( 1.0f - a2 - am );
|
183
|
238
|
}
|
|
239
|
+
|
184
|
240
|
m_a[linestart + x1i] += d * am;
|
185
|
241
|
}
|
186
|
242
|
x = xnext;
|
... |
... |
@@ -374,27 +430,54 @@ dense_render_glyph( dense_worker* worker, const FT_Bitmap* target ) |
374
|
430
|
unsigned char* dest = target->buffer;
|
375
|
431
|
unsigned char* dest_end = target->buffer + worker->m_w * worker->m_h;
|
376
|
432
|
|
377
|
|
- __m128 offset = _mm_setzero_ps();
|
378
|
|
- __m128i mask = _mm_set1_epi32(0x0c080400);
|
379
|
|
- __m128 sign_mask = _mm_set1_ps(-0.f);
|
380
|
|
- for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
|
381
|
|
- __m128 x = _mm_load_ps(&source[i]);
|
382
|
|
- x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
|
383
|
|
- x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
|
384
|
|
- x = _mm_add_ps(x, offset);
|
385
|
|
- __m128 y = _mm_andnot_ps(sign_mask, x); // fabs(x)
|
386
|
|
- y = _mm_min_ps(y, _mm_set1_ps(1.0f));
|
387
|
|
- y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
|
388
|
|
- __m128i z = _mm_cvtps_epi32(y);
|
389
|
|
- z = _mm_shuffle_epi8(z, mask);
|
390
|
|
- _mm_store_ss((float *)&dest[i], (__m128)z);
|
391
|
|
- offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
|
|
433
|
+ // __m128 offset = _mm_setzero_ps();
|
|
434
|
+ // __m128i mask = _mm_set1_epi32(0x0c080400);
|
|
435
|
+ // __m128 sign_mask = _mm_set1_ps(-0.f);
|
|
436
|
+ // for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
|
|
437
|
+ // __m128 x = _mm_load_ps(&source[i]);
|
|
438
|
+ // x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
|
|
439
|
+ // x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
|
|
440
|
+ // x = _mm_add_ps(x, offset);
|
|
441
|
+ // __m128 y = _mm_andnot_ps(sign_mask, x); // fabs(x)
|
|
442
|
+ // y = _mm_min_ps(y, _mm_set1_ps(1.0f));
|
|
443
|
+ // y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
|
|
444
|
+ // __m128i z = _mm_cvtps_epi32(y);
|
|
445
|
+ // z = _mm_shuffle_epi8(z, mask);
|
|
446
|
+ // _mm_store_ss((float *)&dest[i], (__m128)z);
|
|
447
|
+ // offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
|
|
448
|
+ // }
|
|
449
|
+
|
|
450
|
+ FT20D12 valnew = 0;
|
|
451
|
+ //float value = 0.0f;
|
|
452
|
+ while ( dest < dest_end )
|
|
453
|
+ {
|
|
454
|
+ //printf("%d\n", *source);
|
|
455
|
+ valnew += *source++;
|
|
456
|
+
|
|
457
|
+ if(valnew > 0){
|
|
458
|
+ int nnew = valnew >> 4;
|
|
459
|
+
|
|
460
|
+ if(nnew>255)nnew=255;
|
|
461
|
+ *dest = (unsigned char)nnew;
|
|
462
|
+ }else{
|
|
463
|
+ *dest = 0;
|
|
464
|
+ }
|
|
465
|
+
|
|
466
|
+ // value += *source++;
|
|
467
|
+ // if ( value > 0.0f )
|
|
468
|
+ // {
|
|
469
|
+ // int n = (int)( fabs( value ) * 255.0f + 0.5f );
|
|
470
|
+ // if ( n > 255 )
|
|
471
|
+ // n = 255;
|
|
472
|
+ // *dest = (unsigned char)n;
|
|
473
|
+ // }
|
|
474
|
+ // else
|
|
475
|
+ // *dest = 0;
|
|
476
|
+ dest++;
|
392
|
477
|
}
|
393
|
478
|
|
394
|
|
- // float value = 0.0f;
|
395
|
|
- // while ( dest < dest_end )
|
396
|
|
- // {
|
397
|
|
- // value += *source++;
|
|
479
|
+
|
|
480
|
+
|
398
|
481
|
// if ( value > 0.0f )
|
399
|
482
|
// {
|
400
|
483
|
// int n = (int)( fabs( value ) * 255.0f + 0.5f );
|