[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] [gnuradio] 04/14: volk: Added log2
From: |
git |
Subject: |
[Commit-gnuradio] [gnuradio] 04/14: volk: Added log2 |
Date: |
Wed, 15 Oct 2014 23:25:08 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script.
trondeau pushed a commit to branch master
in repository gnuradio.
commit a43d53348d5fabdaeccca1c3ad2c661e1d292e79
Author: Abhishek Bhowmick <address@hidden>
Date: Mon Jun 9 09:51:05 2014 +0530
volk: Added log2
volk: Added fast inaccurate exp kernel.
---
volk/apps/volk_profile.cc | 2 +
volk/kernels/volk/volk_32f_expfast_32f.h | 138 ++++++++++++++
volk/kernels/volk/volk_32f_log2_32f.h | 210 ++++++++++++++++++++++
volk/kernels/volk/volk_32f_log2_32f_mine.cc | 169 -----------------
volk/kernels/volk/volk_32f_log2_32f_mitLicense.cc | 115 ------------
volk/lib/testqa.cc | 2 +
6 files changed, 352 insertions(+), 284 deletions(-)
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 4168847..08c31cb 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -162,6 +162,8 @@ int main(int argc, char *argv[]) {
VOLK_PROFILE(volk_32f_accumulator_s32f, 1e-4, 0, 204602, 10000, &results,
benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_32f_x2_add_32f, 1e-4, 0, 204602, 10000, &results,
benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_32fc_32f_multiply_32fc, 1e-4, 0, 204602, 1000, &results,
benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_32f_log2_32f, 1e-3, 0, 204602, 1000, &results,
benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_32f_expfast_32f, 1e-1, 0, 204602, 1000, &results,
benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_32fc_s32f_power_32fc, 1e-4, 0, 204602, 50, &results,
benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f, 1e-4, 20.0,
204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_32fc_s32f_atan2_32f, 1e-4, 10.0, 204602, 100, &results,
benchmark_mode, kernel_regex);
diff --git a/volk/kernels/volk/volk_32f_expfast_32f.h
b/volk/kernels/volk/volk_32f_expfast_32f.h
new file mode 100644
index 0000000..0826527
--- /dev/null
+++ b/volk/kernels/volk/volk_32f_expfast_32f.h
@@ -0,0 +1,138 @@
+#include <stdio.h>
+#include <math.h>
+#include <inttypes.h>
+#include <immintrin.h>
+
+#define Mln2 0.6931471805f
+#define A 8388608.0f
+#define B 1065353216.0f
+#define C 60801.0f
+
+
+#ifndef INCLUDED_volk_32f_expfast_32f_a_H
+#define INCLUDED_volk_32f_expfast_32f_a_H
+
+#ifdef LV_HAVE_SSE4_1
+#include <smmintrin.h>
+/*!
+ \brief Computes fast exp (max 7% error) of input vector and stores results
in output vector
+ \param bVector The vector where results will be stored
+ \param aVector The input vector of floats
+ \param num_points Number of points for which log is to be computed
+*/
+static inline void volk_32f_expfast_32f_a_sse4_1(float* bVector, const float*
aVector, unsigned int num_points){
+
+ float* bPtr = bVector;
+ const float* aPtr = aVector;
+
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ __m128 aVal, bVal, a, b;
+ __m128i exp;
+ a = _mm_set1_ps(A/Mln2);
+ b = _mm_set1_ps(B-C);
+
+ for(;number < quarterPoints; number++){
+ aVal = _mm_load_ps(aPtr);
+ exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
+ bVal = _mm_castsi128_ps(exp);
+
+ _mm_store_ps(bPtr, bVal);
+ aPtr += 4;
+ bPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *bPtr++ = expf(*aPtr++);
+ }
+}
+
+#endif /* LV_HAVE_SSE4_1 for aligned */
+
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Computes fast exp (max 7% error) of input vector and stores results
in output vector
+ \param bVector The vector where results will be stored
+ \param aVector The input vector of floats
+ \param num_points Number of points for which log is to be computed
+*/
+static inline void volk_32f_expfast_32f_a_generic(float* bVector, const float*
aVector, unsigned int num_points){
+ float* bPtr = bVector;
+ const float* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *bPtr++ = expf(*aPtr++);
+ }
+
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32f_expfast_32f_a_H */
+
+#ifndef INCLUDED_volk_32f_expfast_32f_u_H
+#define INCLUDED_volk_32f_expfast_32f_u_H
+
+#ifdef LV_HAVE_SSE4_1
+#include <smmintrin.h>
+/*!
+ \brief Computes fast exp (max 7% error) of input vector and stores results
in output vector
+ \param bVector The vector where results will be stored
+ \param aVector The input vector of floats
+ \param num_points Number of points for which log is to be computed
+*/
+static inline void volk_32f_expfast_32f_u_sse4_1(float* bVector, const float*
aVector, unsigned int num_points){
+
+ float* bPtr = bVector;
+ const float* aPtr = aVector;
+
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ __m128 aVal, bVal, a, b;
+ __m128i exp;
+ a = _mm_set1_ps(A/Mln2);
+ b = _mm_set1_ps(B-C);
+
+ for(;number < quarterPoints; number++){
+ aVal = _mm_loadu_ps(aPtr);
+ exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
+ bVal = _mm_castsi128_ps(exp);
+
+ _mm_storeu_ps(bPtr, bVal);
+ aPtr += 4;
+ bPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *bPtr++ = expf(*aPtr++);
+ }
+}
+
+#endif /* LV_HAVE_SSE4_1 for unaligned */
+
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Computes fast exp (max 7% error) of input vector and stores results
in output vector
+ \param bVector The vector where results will be stored
+ \param aVector The input vector of floats
+ \param num_points Number of points for which log is to be computed
+*/
+static inline void volk_32f_expfast_32f_u_generic(float* bVector, const float*
aVector, unsigned int num_points){
+ float* bPtr = bVector;
+ const float* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *bPtr++ = expf(*aPtr++);
+ }
+
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32f_expfast_32f_u_H */
diff --git a/volk/kernels/volk/volk_32f_log2_32f.h
b/volk/kernels/volk/volk_32f_log2_32f.h
new file mode 100644
index 0000000..c6caceb
--- /dev/null
+++ b/volk/kernels/volk/volk_32f_log2_32f.h
@@ -0,0 +1,210 @@
+/*
+ * This kernel was adapted from Jose Fonseca's Fast SSE2 log implementation
+ * http://jrfonseca.blogspot.in/2008/09/fast-sse2-pow-tables-or-polynomials.htm
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ * This is the MIT License (MIT)
+
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <math.h>
+
+#define POLY0(x, c0) _mm_set1_ps(c0)
+#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x),
_mm_set1_ps(c0))
+#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x),
_mm_set1_ps(c0))
+#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3),
x), _mm_set1_ps(c0))
+#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2,
c3, c4), x), _mm_set1_ps(c0))
+#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1,
c2, c3, c4, c5), x), _mm_set1_ps(c0))
+
+#define LOG_POLY_DEGREE 3
+
+
+#ifndef INCLUDED_volk_32f_log2_32f_a_H
+#define INCLUDED_volk_32f_log2_32f_a_H
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Computes base 2 log of input vector and stores results in output
vector
+ \param bVector The vector where results will be stored
+ \param aVector The input vector of floats
+ \param num_points Number of points for which log is to be computed
+*/
+static inline void volk_32f_log2_32f_generic(float* bVector, const float*
aVector, unsigned int num_points){
+ float* bPtr = bVector;
+ const float* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *bPtr++ = log2(*aPtr++);
+ }
+
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#ifdef LV_HAVE_SSE4_1
+#include <smmintrin.h>
+/*!
+ \brief Computes base 2 log of input vector and stores results in output
vector
+ \param bVector The vector where results will be stored
+ \param aVector The input vector of floats
+ \param num_points Number of points for which log is to be computed
+*/
+static inline void volk_32f_log2_32f_a_sse4_1(float* bVector, const float*
aVector, unsigned int num_points){
+
+ float* bPtr = bVector;
+ const float* aPtr = aVector;
+
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ __m128 aVal, bVal, mantissa, frac, leadingOne;
+ __m128i bias, exp;
+
+ for(;number < quarterPoints; number++){
+
+ aVal = _mm_load_ps(aPtr);
+ bias = _mm_set1_epi32(127);
+ leadingOne = _mm_set1_ps(1.0f);
+ exp =
_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(_mm_castps_si128(aVal),
_mm_set1_epi32(0x7f800000)), 23), bias);
+ bVal = _mm_cvtepi32_ps(exp);
+
+ // Now to extract mantissa
+ frac = _mm_or_ps(leadingOne, _mm_and_ps(aVal,
_mm_castsi128_ps(_mm_set1_epi32(0x7fffff))));
+
+ #if LOG_POLY_DEGREE == 6
+ mantissa = POLY5( frac, 3.1157899f, -3.3241990f, 2.5988452f,
-1.2315303f, 3.1821337e-1f, -3.4436006e-2f);
+ #elif LOG_POLY_DEGREE == 5
+ mantissa = POLY4( frac, 2.8882704548164776201f,
-2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f,
0.0596515482674574969533f);
+ #elif LOG_POLY_DEGREE == 4
+ mantissa = POLY3( frac, 2.61761038894603480148f,
-1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
+ #elif LOG_POLY_DEGREE == 3
+ mantissa = POLY2( frac, 2.28330284476918490682f,
-1.04913055217340124191f, 0.204446009836232697516f);
+ #else
+ #error
+ #endif
+
+ bVal = _mm_add_ps(bVal, _mm_mul_ps(mantissa, _mm_sub_ps(frac,
leadingOne)));
+ _mm_store_ps(bPtr, bVal);
+
+
+ aPtr += 4;
+ bPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *bPtr++ = log2(*aPtr++);
+ }
+}
+
+#endif /* LV_HAVE_SSE4_1 for aligned */
+
+#endif /* INCLUDED_volk_32f_log2_32f_a_H */
+
+#ifndef INCLUDED_volk_32f_log2_32f_u_H
+#define INCLUDED_volk_32f_log2_32f_u_H
+
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Computes base 2 log of input vector and stores results in output
vector
+ \param bVector The vector where results will be stored
+ \param aVector The input vector of floats
+ \param num_points Number of points for which log is to be computed
+*/
+static inline void volk_32f_log2_32f_u_generic(float* bVector, const float*
aVector, unsigned int num_points){
+ float* bPtr = bVector;
+ const float* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *bPtr++ = log2(*aPtr++);
+ }
+
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#ifdef LV_HAVE_SSE4_1
+#include <smmintrin.h>
+/*!
+ \brief Computes base 2 log of input vector and stores results in output
vector
+ \param bVector The vector where results will be stored
+ \param aVector The input vector of floats
+ \param num_points Number of points for which log is to be computed
+*/
+static inline void volk_32f_log2_32f_u_sse4_1(float* bVector, const float*
aVector, unsigned int num_points){
+
+ float* bPtr = bVector;
+ const float* aPtr = aVector;
+
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ __m128 aVal, bVal, mantissa, frac, leadingOne;
+ __m128i bias, exp;
+
+ for(;number < quarterPoints; number++){
+
+ aVal = _mm_loadu_ps(aPtr);
+ bias = _mm_set1_epi32(127);
+ leadingOne = _mm_set1_ps(1.0f);
+ exp =
_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(_mm_castps_si128(aVal),
_mm_set1_epi32(0x7f800000)), 23), bias);
+ bVal = _mm_cvtepi32_ps(exp);
+
+ // Now to extract mantissa
+ frac = _mm_or_ps(leadingOne, _mm_and_ps(aVal,
_mm_castsi128_ps(_mm_set1_epi32(0x7fffff))));
+
+ #if LOG_POLY_DEGREE == 6
+ mantissa = POLY5( frac, 3.1157899f, -3.3241990f, 2.5988452f,
-1.2315303f, 3.1821337e-1f, -3.4436006e-2f);
+ #elif LOG_POLY_DEGREE == 5
+ mantissa = POLY4( frac, 2.8882704548164776201f,
-2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f,
0.0596515482674574969533f);
+ #elif LOG_POLY_DEGREE == 4
+ mantissa = POLY3( frac, 2.61761038894603480148f,
-1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
+ #elif LOG_POLY_DEGREE == 3
+ mantissa = POLY2( frac, 2.28330284476918490682f,
-1.04913055217340124191f, 0.204446009836232697516f);
+ #else
+ #error
+ #endif
+
+ bVal = _mm_add_ps(bVal, _mm_mul_ps(mantissa, _mm_sub_ps(frac,
leadingOne)));
+ _mm_storeu_ps(bPtr, bVal);
+
+
+ aPtr += 4;
+ bPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *bPtr++ = log2(*aPtr++);
+ }
+}
+
+#endif /* LV_HAVE_SSE4_1 for unaligned */
+
+#endif /* INCLUDED_volk_32f_log2_32f_u_H */
diff --git a/volk/kernels/volk/volk_32f_log2_32f_mine.cc
b/volk/kernels/volk/volk_32f_log2_32f_mine.cc
deleted file mode 100644
index 94d3daa..0000000
--- a/volk/kernels/volk/volk_32f_log2_32f_mine.cc
+++ /dev/null
@@ -1,169 +0,0 @@
-
-#include <stdio.h>
-#include <math.h>
-#include <inttypes.h>
-#include <iostream>
-#include <immintrin.h>
-
-void print128_num(__m128i var)
-{
- uint32_t *val = (uint32_t*) &var;//can also use uint32_t instead of 16_t
- printf("Vector of Ints: %x %x %d %d \n",
- val[3], val[2], val[1], val[0]);
-}
-
-void print128f_num(__m128 var)
-{
- float *val = (float*) &var;//can also use uint32_t instead of 16_t
- printf("Vector of Floats: %f %f %f %f \n",
- val[3], val[2], val[1], val[0]);
-}
-
-static inline void volk_32f_log2_32f_a_avx(float* bVector, float* aVector,
unsigned int num_points){
-
- float* bPtr = bVector;
- float* aPtr = aVector;
- int exp1 = 0x7f800000;
-
- __m128 aVal, bVal, charac, mantissa;
- __m128i bias = _mm_set1_epi32(127);
- __m128i aVali = _mm_castps_si128(aVal);
- __m128i exp = _mm_set1_epi32(exp1);
- aVal = _mm_load_ps(aPtr);
- exp = _mm_and_si128(aVali, exp);
- exp = _mm_srli_epi32(exp, 23);
- exp = _mm_sub_epi32(exp, bias);
- charac = _mm_cvtepi32_ps(exp);
-
- // Now to extract mantissa
- int frac1 = 0x007fffff;
- __m128i fracMask = _mm_set1_epi32(frac1);
- __m128 frac = _mm_castsi128_ps(fracMask);
- frac = _mm_and_ps(aVal, frac);
- __m128 leadingOne = _mm_set1_ps(1.0f);
- frac = _mm_or_ps(leadingOne, frac);
-
-
- // Add the decimal bits one by one, can use LUT to speed up
- __m128 compareResults, compareResultsNot, compareTo, fracByTwo, allOnes, a,
b, c;
- __m128i allOnesi;
- allOnesi = _mm_set1_epi32(0xffffffff);
- allOnes = _mm_castsi128_ps(allOnesi);
- compareTo = _mm_set1_ps(2.0);
-
- mantissa = _mm_set1_ps(0.5);
- frac = _mm_mul_ps(frac, frac);
- compareResults = _mm_cmpge_ps(frac, compareTo); // ge compare
- compareResultsNot = _mm_andnot_ps(compareResults, allOnes);
- fracByTwo = _mm_div_ps(frac, compareTo);
- a = _mm_and_ps(compareResults, fracByTwo);
- //b = _mm_and_ps(compareResultsNot, frac);
- frac = _mm_sub_ps(frac, a);
- c = _mm_and_ps(compareResults, mantissa);
- charac = _mm_add_ps(charac, c);
-
-
- mantissa = _mm_set1_ps(0.25);
- frac = _mm_mul_ps(frac, frac);
- compareResults = _mm_cmpge_ps(frac, compareTo); // ge compare
- compareResultsNot = _mm_andnot_ps(compareResults, allOnes);
- fracByTwo = _mm_div_ps(frac, compareTo);
- a = _mm_and_ps(compareResults, fracByTwo);
- //b = _mm_and_ps(compareResultsNot, frac);
- frac = _mm_sub_ps(frac, a);
- charac = _mm_add_ps(charac, _mm_and_ps(compareResults, mantissa));
-
-
- mantissa = _mm_set1_ps(0.125);
- frac = _mm_mul_ps(frac, frac);
- compareResults = _mm_cmpge_ps(frac, compareTo); // ge compare
- compareResultsNot = _mm_andnot_ps(compareResults, allOnes);
- fracByTwo = _mm_div_ps(frac, compareTo);
- a = _mm_and_ps(compareResults, fracByTwo);
- b = _mm_and_ps(compareResultsNot, frac);
- frac = _mm_add_ps(a, b);
- charac = _mm_add_ps(charac, _mm_and_ps(compareResults, mantissa));
-
-/*
- mantissa = _mm_set1_ps(0.0625);
- frac = _mm_mul_ps(frac, frac);
- compareResults = _mm_cmpge_ps(frac, compareTo); // ge compare
- compareResultsNot = _mm_andnot_ps(compareResults, allOnes);
- fracByTwo = _mm_div_ps(frac, compareTo);
- a = _mm_and_ps(compareResults, fracByTwo);
- b = _mm_and_ps(compareResultsNot, frac);
- frac = _mm_add_ps(a, b);
- charac = _mm_add_ps(charac, _mm_and_ps(compareResults, mantissa));
-
- mantissa = _mm_set1_ps(0.03125);
- frac = _mm_mul_ps(frac, frac);
- compareResults = _mm_cmpge_ps(frac, compareTo); // ge compare
- compareResultsNot = _mm_andnot_ps(compareResults, allOnes);
- fracByTwo = _mm_div_ps(frac, compareTo);
- a = _mm_and_ps(compareResults, fracByTwo);
- b = _mm_and_ps(compareResultsNot, frac);
- frac = _mm_add_ps(a, b);
- charac = _mm_add_ps(charac, _mm_and_ps(compareResults, mantissa));
-
- mantissa = _mm_set1_ps(0.015625);
- frac = _mm_mul_ps(frac, frac);
- compareResults = _mm_cmpge_ps(frac, compareTo); // ge compare
- compareResultsNot = _mm_andnot_ps(compareResults, allOnes);
- fracByTwo = _mm_div_ps(frac, compareTo);
- a = _mm_and_ps(compareResults, fracByTwo);
- b = _mm_and_ps(compareResultsNot, frac);
- frac = _mm_add_ps(a, b);
- charac = _mm_add_ps(charac, _mm_and_ps(compareResults, mantissa));
-
- mantissa = _mm_set1_ps(0.0078125);
- frac = _mm_mul_ps(frac, frac);
- compareResults = _mm_cmpge_ps(frac, compareTo); // ge compare
- compareResultsNot = _mm_andnot_ps(compareResults, allOnes);
- fracByTwo = _mm_div_ps(frac, compareTo);
- a = _mm_and_ps(compareResults, fracByTwo);
- b = _mm_and_ps(compareResultsNot, frac);
- frac = _mm_add_ps(a, b);
- charac = _mm_add_ps(charac, _mm_and_ps(compareResults, mantissa));
-*/
- _mm_store_ps(bPtr, charac);
-}
-
-
-int main(){
-
-clock_t start, end;
-double time;
-float dummy;
-//float input[] = {0.5, 1.0, 4.0, 9.5};
-float* input = new float(4*sizeof(float));
-input[0] = 0.125;
-input[1] = 2.4;
-input[2] = 8.0;
-input[3] = 16.0;
-float* output = new float(4*sizeof(float));
-
-start = clock();
-for(int i = 0; i < 10000000; i++)
- volk_32f_log2_32f_a_avx(output, input, 4);
-end = clock();
-time = 1000 * (double)(end-start);
-std::cout << "my time is " << time << std::endl;
-
-start = clock();
-for(int j = 0; j < 40000000; j++)
- dummy = log2(input[0]);
-end = clock();
-time = 1000 * (double)(end-start);
-std::cout << "cmath time is " << time << std::endl;
-
-
-for(int i =0; i < 4; i ++){
- std::cout << input[i] << "\t" << output[i];
- std::cout << " cmath : \t " << log2(input[i]) << std::endl;
-}
-
-delete input;
-delete output;
-return 0;
-
-}
diff --git a/volk/kernels/volk/volk_32f_log2_32f_mitLicense.cc
b/volk/kernels/volk/volk_32f_log2_32f_mitLicense.cc
deleted file mode 100644
index 7ee18cc..0000000
--- a/volk/kernels/volk/volk_32f_log2_32f_mitLicense.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-
-#include <stdio.h>
-#include <math.h>
-#include <inttypes.h>
-#include <iostream>
-#include <immintrin.h>
-
-
-#define POLY0(x, c0) _mm_set1_ps(c0)
-#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x),
_mm_set1_ps(c0))
-#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x),
_mm_set1_ps(c0))
-#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3),
x), _mm_set1_ps(c0))
-#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2,
c3, c4), x), _mm_set1_ps(c0))
-#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1,
c2, c3, c4, c5), x), _mm_set1_ps(c0))
-
-#define LOG_POLY_DEGREE 3
-
-void print128_num(__m128i var)
-{
- uint32_t *val = (uint32_t*) &var;//can also use uint32_t instead of 16_t
- printf("Vector of Ints: %x %x %d %d \n",
- val[3], val[2], val[1], val[0]);
-}
-
-void print128f_num(__m128 var)
-{
- float *val = (float*) &var;//can also use uint32_t instead of 16_t
- printf("Vector of Floats: %f %f %f %f \n",
- val[3], val[2], val[1], val[0]);
-}
-
-static inline void volk_32f_log2_32f_a_avx(float* bVector, float* aVector,
unsigned int num_points){
-
- float* bPtr = bVector;
- float* aPtr = aVector;
- int exp1 = 0x7f800000;
-
- __m128 aVal, bVal, charac, mantissa, frac;
- __m128i bias = _mm_set1_epi32(127);
- __m128i aVali = _mm_castps_si128(aVal);
- __m128i exp = _mm_set1_epi32(exp1);
- aVal = _mm_load_ps(aPtr);
- exp = _mm_and_si128(aVali, exp);
- exp = _mm_srli_epi32(exp, 23);
- exp = _mm_sub_epi32(exp, bias);
- charac = _mm_cvtepi32_ps(exp);
-
- // Now to extract mantissa
- int frac1 = 0x007fffff;
- __m128i fracMask = _mm_set1_epi32(frac1);
- frac = _mm_castsi128_ps(fracMask);
- frac = _mm_and_ps(aVal, frac);
- __m128 leadingOne = _mm_set1_ps(1.0f);
- frac = _mm_or_ps(leadingOne, frac);
-
-
- // This is where MIT Licensed code starts
- #if LOG_POLY_DEGREE == 6
- mantissa = POLY5( frac, 3.1157899f, -3.3241990f, 2.5988452f, -1.2315303f,
3.1821337e-1f, -3.4436006e-2f);
-#elif LOG_POLY_DEGREE == 5
- mantissa = POLY4( frac, 2.8882704548164776201f, -2.52074962577807006663f,
1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f);
-#elif LOG_POLY_DEGREE == 4
- mantissa = POLY3( frac, 2.61761038894603480148f, -1.75647175389045657003f,
0.688243882994381274313f, -0.107254423828329604454f);
-#elif LOG_POLY_DEGREE == 3
- mantissa = POLY2( frac, 2.28330284476918490682f, -1.04913055217340124191f,
0.204446009836232697516f);
-#else
-#error
-#endif
-
- mantissa = _mm_mul_ps(mantissa, _mm_sub_ps(frac, leadingOne));
-
- bVal = _mm_add_ps(charac, mantissa);
-
- _mm_store_ps(bPtr, bVal);
-}
-
-
-int main(){
-
-clock_t start, end;
-double time;
-float dummy;
-//float input[] = {0.5, 1.0, 4.0, 9.5};
-float* input = new float(4*sizeof(float));
-input[0] = 0.125;
-input[1] = 2.4;
-input[2] = 8.0;
-input[3] = 16.0;
-float* output = new float(4*sizeof(float));
-
-start = clock();
-for(int i = 0; i < 10000000; i++)
- volk_32f_log2_32f_a_avx(output, input, 4);
-end = clock();
-time = 1000 * (double)(end-start);
-std::cout << "my time is " << time << std::endl;
-
-start = clock();
-for(int j = 0; j < 40000000; j++)
- dummy = log2(input[0]);
-end = clock();
-time = 1000 * (double)(end-start);
-std::cout << "cmath time is " << time << std::endl;
-
-
-for(int i =0; i < 4; i ++){
- std::cout << input[i] << "\t" << output[i];
- std::cout << " cmath : \t " << log2(input[i]) << std::endl;
-}
-
-delete input;
-delete output;
-return 0;
-
-}
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
index 9d83751..bdea584 100644
--- a/volk/lib/testqa.cc
+++ b/volk/lib/testqa.cc
@@ -44,6 +44,8 @@ VOLK_RUN_TESTS(volk_16u_byteswap, 0, 0, 20462, 1);
VOLK_RUN_TESTS(volk_32f_accumulator_s32f, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_32f_x2_add_32f, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc, 1e-4, 0, 20462, 1);
+VOLK_RUN_TESTS(volk_32f_log2_32f, 1e-3, 0, 20462, 1);
+VOLK_RUN_TESTS(volk_32f_expfast_32f, 1e-1, 0, 20462, 1);
VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f, 1e-4, 20.0, 20462,
1);
VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f, 1e-4, 10.0, 20462, 1);
- [Commit-gnuradio] [gnuradio] branch master updated (d50c57a -> 8ebe90f), git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 03/14: volk: temp log kernels., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 04/14: volk: Added log2,
git <=
- [Commit-gnuradio] [gnuradio] 06/14: volk: expfast comments, git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 08/14: volk: Added sin, cos kernels., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 12/14: volk: fixed some warnings, git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 13/14: volk: fixed a problem with acos during some translation in the git history., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 09/14: volk: Added tan kernel., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 07/14: volk: added power kernel., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 10/14: volk: Added atan, asin, acos kernels., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 01/14: added new proto-kernels, git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 05/14: volk: Added avx proto-kernel for fast exp., git, 2014/10/15
- [Commit-gnuradio] [gnuradio] 02/14: volk: Added proto-kernels for convert, multiply, conjugate, deinterleave, magnitude, mag-square, psd functions., git, 2014/10/15