[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] [gnuradio] 13/14: volk: fixed a problem with acos duri
From: |
git |
Subject: |
[Commit-gnuradio] [gnuradio] 13/14: volk: fixed a problem with acos during some translation in the git history. |
Date: |
Wed, 15 Oct 2014 23:25:09 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script.
trondeau pushed a commit to branch master
in repository gnuradio.
commit 06e0398f4747c2c81f28c6cb9b5796c4fb6de8f6
Author: Tom Rondeau <address@hidden>
Date: Wed Oct 15 12:53:10 2014 -0400
volk: fixed a problem with acos during some translation in the git history.
---
volk/kernels/volk/volk_32f_acos_32f.h | 238 +++++++++++++++++-----------------
volk/kernels/volk/volk_32f_asin_32f.h | 2 +-
2 files changed, 119 insertions(+), 121 deletions(-)
diff --git a/volk/kernels/volk/volk_32f_acos_32f.h
b/volk/kernels/volk/volk_32f_acos_32f.h
index 945ba39..18985f2 100644
--- a/volk/kernels/volk/volk_32f_acos_32f.h
+++ b/volk/kernels/volk/volk_32f_acos_32f.h
@@ -18,67 +18,67 @@
*/
static inline void volk_32f_acos_32f_a_sse4_1(float* bVector, const float*
aVector, unsigned int num_points){
- float* bPtr = bVector;
- const float* aPtr = aVector;
-
- unsigned int number = 0;
- unsigned int quarterPoints = num_points / 4;
- int i, j;
-
- __m128 aVal, d, pi, pio2, x, y, z, arccosine;
- __m128 fzeroes, fones, ftwos, ffours, condition;
-
- pi = _mm_set1_ps(3.14159265358979323846);
- pio2 = _mm_set1_ps(3.14159265358979323846/2);
- fzeroes = _mm_setzero_ps();
- fones = _mm_set1_ps(1.0);
- ftwos = _mm_set1_ps(2.0);
- ffours = _mm_set1_ps(4.0);
-
- for(;number < quarterPoints; number++){
- aVal = _mm_load_ps(aPtr);
- d = aVal;
- aVal = _mm_div_ps(_mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal),
_mm_sub_ps(fones, aVal))), aVal);
- z = aVal;
- condition = _mm_cmplt_ps(z, fzeroes);
- z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
- x = z;
- condition = _mm_cmplt_ps(z, fones);
- x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z),
condition));
-
- for(i = 0; i < 2; i++){
- x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x,
x))));
- }
- x = _mm_div_ps(fones, x);
- y = fzeroes;
- for(j = ACOS_TERMS - 1; j >=0 ; j--){
- y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)),
_mm_set1_ps(pow(-1,j)/(2*j+1)));
- }
-
- y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
- condition = _mm_cmpgt_ps(z, fones);
-
- y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)),
condition));
- arccosine = y;
- condition = _mm_cmplt_ps(aVal, fzeroes);
- arccosine = _mm_sub_ps(arccosine, _mm_and_ps(_mm_mul_ps(arccosine,
ftwos), condition));
- condition = _mm_cmplt_ps(d, fzeroes);
- arccosine = _mm_add_ps(arccosine, _mm_and_ps(pi, condition));
-
- _mm_store_ps(bPtr, arccosine);
- aPtr += 4;
- bPtr += 4;
- }
-
- number = quarterPoints * 4;
- for(;number < num_points; number++){
- *bPtr++ = acos(*aPtr++);
- }
+ float* bPtr = bVector;
+ const float* aPtr = aVector;
+
+ unsigned int number = 0;
+ unsigned int quarterPoints = num_points / 4;
+ int i, j;
+
+ __m128 aVal, d, pi, pio2, x, y, z, arccosine;
+ __m128 fzeroes, fones, ftwos, ffours, condition;
+
+ pi = _mm_set1_ps(3.14159265358979323846);
+ pio2 = _mm_set1_ps(3.14159265358979323846/2);
+ fzeroes = _mm_setzero_ps();
+ fones = _mm_set1_ps(1.0);
+ ftwos = _mm_set1_ps(2.0);
+ ffours = _mm_set1_ps(4.0);
+
+ for(;number < quarterPoints; number++){
+ aVal = _mm_load_ps(aPtr);
+ d = aVal;
+ aVal = _mm_div_ps(_mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal),
_mm_sub_ps(fones, aVal))), aVal);
+ z = aVal;
+ condition = _mm_cmplt_ps(z, fzeroes);
+ z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
+ x = z;
+ condition = _mm_cmplt_ps(z, fones);
+ x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z),
condition));
+
+ for(i = 0; i < 2; i++)
+ x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
+ x = _mm_div_ps(fones, x);
+ y = fzeroes;
+ for(j = ACOS_TERMS - 1; j >=0 ; j--)
+ y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)),
_mm_set1_ps(pow(-1,j)/(2*j+1)));
+
+ y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
+ condition = _mm_cmpgt_ps(z, fones);
+
+ y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)),
condition));
+ arccosine = y;
+ condition = _mm_cmplt_ps(aVal, fzeroes);
+ arccosine = _mm_sub_ps(arccosine, _mm_and_ps(_mm_mul_ps(arccosine, ftwos),
condition));
+ condition = _mm_cmplt_ps(d, fzeroes);
+ arccosine = _mm_add_ps(arccosine, _mm_and_ps(pi, condition));
+
+ _mm_store_ps(bPtr, arccosine);
+ aPtr += 4;
+ bPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *bPtr++ = acos(*aPtr++);
+ }
}
#endif /* LV_HAVE_SSE4_1 for aligned */
-#endif /* INCLUDED_volk_32f_acos_32_f_H */
+#endif /* INCLUDED_volk_32f_acos_32f_a_H */
+
+
#ifndef INCLUDED_volk_32f_acos_32f_u_H
#define INCLUDED_volk_32f_acos_32f_u_H
@@ -91,63 +91,61 @@ static inline void volk_32f_acos_32f_a_sse4_1(float*
bVector, const float* aVect
\param num_points Number of points for which arccosine is to be computed
*/
static inline void volk_32f_acos_32f_u_sse4_1(float* bVector, const float*
aVector, unsigned int num_points){
-
- float* bPtr = bVector;
- const float* aPtr = aVector;
-
- unsigned int number = 0;
- unsigned int quarterPoints = num_points / 4;
- int i, j;
-
- __m128 aVal, d, pi, pio2, x, y, z, arccosine;
- __m128 fzeroes, fones, ftwos, ffours, condition;
-
- pi = _mm_set1_ps(3.14159265358979323846);
- pio2 = _mm_set1_ps(3.14159265358979323846/2);
- fzeroes = _mm_setzero_ps();
- fones = _mm_set1_ps(1.0);
- ftwos = _mm_set1_ps(2.0);
- ffours = _mm_set1_ps(4.0);
-
- for(;number < quarterPoints; number++){
- aVal = _mm_loadu_ps(aPtr);
- d = aVal;
- aVal = _mm_div_ps(_mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal),
_mm_sub_ps(fones, aVal))), aVal);
- z = aVal;
- condition = _mm_cmplt_ps(z, fzeroes);
- z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
- x = z;
- condition = _mm_cmplt_ps(z, fones);
- x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z),
condition));
-
- for(i = 0; i < 2; i++){
- x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x,
x))));
- }
- x = _mm_div_ps(fones, x);
- y = fzeroes;
- for(j = ACOS_TERMS - 1; j >=0 ; j--){
- x = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)),
_mm_set1_ps(pow(-1,j)/(2*j+1)));
- }
-
- y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
- condition = _mm_cmpgt_ps(z, fones);
-
- y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)),
condition));
- arccosine = y;
- condition = _mm_cmplt_ps(aVal, fzeroes);
- arccosine = _mm_sub_ps(arccosine, _mm_and_ps(_mm_mul_ps(arccosine,
ftwos), condition));
- condition = _mm_cmplt_ps(d, fzeroes);
- arccosine = _mm_add_ps(arccosine, _mm_and_ps(pi, condition));
-
- _mm_storeu_ps(bPtr, arccosine);
- aPtr += 4;
- bPtr += 4;
- }
-
- number = quarterPoints * 4;
- for(;number < num_points; number++){
- *bPtr++ = acos(*aPtr++);
- }
+ float* bPtr = bVector;
+ const float* aPtr = aVector;
+
+ unsigned int number = 0;
+ unsigned int quarterPoints = num_points / 4;
+ int i, j;
+
+ __m128 aVal, d, pi, pio2, x, y, z, arccosine;
+ __m128 fzeroes, fones, ftwos, ffours, condition;
+
+ pi = _mm_set1_ps(3.14159265358979323846);
+ pio2 = _mm_set1_ps(3.14159265358979323846/2);
+ fzeroes = _mm_setzero_ps();
+ fones = _mm_set1_ps(1.0);
+ ftwos = _mm_set1_ps(2.0);
+ ffours = _mm_set1_ps(4.0);
+
+ for(;number < quarterPoints; number++){
+ aVal = _mm_loadu_ps(aPtr);
+ d = aVal;
+ aVal = _mm_div_ps(_mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal),
_mm_sub_ps(fones, aVal))), aVal);
+ z = aVal;
+ condition = _mm_cmplt_ps(z, fzeroes);
+ z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
+ x = z;
+ condition = _mm_cmplt_ps(z, fones);
+ x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z),
condition));
+
+ for(i = 0; i < 2; i++)
+ x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
+ x = _mm_div_ps(fones, x);
+ y = fzeroes;
+
+ for(j = ACOS_TERMS - 1; j >=0 ; j--)
+ y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)),
_mm_set1_ps(pow(-1,j)/(2*j+1)));
+
+ y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
+ condition = _mm_cmpgt_ps(z, fones);
+
+ y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)),
condition));
+ arccosine = y;
+ condition = _mm_cmplt_ps(aVal, fzeroes);
+ arccosine = _mm_sub_ps(arccosine, _mm_and_ps(_mm_mul_ps(arccosine, ftwos),
condition));
+ condition = _mm_cmplt_ps(d, fzeroes);
+ arccosine = _mm_add_ps(arccosine, _mm_and_ps(pi, condition));
+
+ _mm_storeu_ps(bPtr, arccosine);
+ aPtr += 4;
+ bPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *bPtr++ = acos(*aPtr++);
+ }
}
#endif /* LV_HAVE_SSE4_1 for aligned */
@@ -160,13 +158,13 @@ static inline void volk_32f_acos_32f_u_sse4_1(float*
bVector, const float* aVect
\param num_points Number of points for which arccosine is to be computed
*/
static inline void volk_32f_acos_32f_generic(float* bVector, const float*
aVector, unsigned int num_points){
- float* bPtr = bVector;
- const float* aPtr = aVector;
- unsigned int number = 0;
+ float* bPtr = bVector;
+ const float* aPtr = aVector;
+ unsigned int number = 0;
- for(number = 0; number < num_points; number++){
- *bPtr++ = acos(*aPtr++);
- }
+ for(number = 0; number < num_points; number++){
+ *bPtr++ = acos(*aPtr++);
+ }
}
#endif /* LV_HAVE_GENERIC */
diff --git a/volk/kernels/volk/volk_32f_asin_32f.h
b/volk/kernels/volk/volk_32f_asin_32f.h
index 2bae3a5..d7322a4 100644
--- a/volk/kernels/volk/volk_32f_asin_32f.h
+++ b/volk/kernels/volk/volk_32f_asin_32f.h
@@ -164,4 +164,4 @@ static inline void volk_32f_asin_32f_u_generic(float*
bVector, const float* aVec
}
#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_32f_asin_32f_a_H */
+#endif /* INCLUDED_volk_32f_asin_32f_u_H */
- [Commit-gnuradio] [gnuradio] branch master updated (d50c57a -> 8ebe90f), git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 03/14: volk: temp log kernels., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 04/14: volk: Added log2, git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 06/14: volk: expfast comments, git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 08/14: volk: Added sin, cos kernels., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 12/14: volk: fixed some warnings, git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 13/14: volk: fixed a problem with acos during some translation in the git history.,
git <=
- [Commit-gnuradio] [gnuradio] 09/14: volk: Added tan kernel., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 07/14: volk: added power kernel., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 10/14: volk: Added atan, asin, acos kernels., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 01/14: added new proto-kernels, git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 05/14: volk: Added avx proto-kernel for fast exp., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 02/14: volk: Added proto-kernels for convert, multiply, conjugate, deinterleave, magnitude, mag-square, psd functions., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 11/14: volk (gsoc): whitespace, git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 14/14: volk: adding copyright notice to all volk kernels., git, 2014/10/15