[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] [gnuradio] 07/22: volk: add neon kernel for 16i_32fc_d
From: |
git |
Subject: |
[Commit-gnuradio] [gnuradio] 07/22: volk: add neon kernel for 16i_32fc_dot_prod_32fc |
Date: |
Fri, 31 Oct 2014 19:22:30 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script.
jcorgan pushed a commit to branch master
in repository gnuradio.
commit c84890860e506ab0be0014674401910ac80ec267
Author: Nathan West <address@hidden>
Date: Sat Oct 18 22:27:52 2014 -0500
volk: add neon kernel for 16i_32fc_dot_prod_32fc
---
volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h | 48 ++++++++++++++++++++++++-
1 file changed, 47 insertions(+), 1 deletion(-)
diff --git a/volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
b/volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
index 27f0bf6..2656d76 100644
--- a/volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
+++ b/volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
@@ -29,7 +29,6 @@
#ifdef LV_HAVE_GENERIC
-
static inline void volk_16i_32fc_dot_prod_32fc_generic(lv_32fc_t* result,
const short* input, const lv_32fc_t * taps, unsigned int num_points) {
static const int N_UNROLL = 4;
@@ -58,7 +57,54 @@ static inline void
volk_16i_32fc_dot_prod_32fc_generic(lv_32fc_t* result, const
#endif /*LV_HAVE_GENERIC*/
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+static inline void volk_16i_32fc_dot_prod_32fc_neon(lv_32fc_t* result, const
short* input, const lv_32fc_t * taps, unsigned int num_points) {
+
+ unsigned ii;
+ unsigned quarter_points = num_points / 4;
+ lv_32fc_t* tapsPtr = (lv_32fc_t*) taps;
+ short* inputPtr = (short*) input;
+ lv_32fc_t accumulator_vec[4];
+
+ float32x4x2_t tapsVal, accumulator_val;
+ int16x4_t input16;
+ int32x4_t input32;
+ float32x4_t input_float, prod_re, prod_im;
+
+ accumulator_val.val[0] = vdupq_n_f32(0.0);
+ accumulator_val.val[1] = vdupq_n_f32(0.0);
+
+ for(ii = 0; ii < quarter_points; ++ii) {
+ tapsVal = vld2q_f32((float*)tapsPtr);
+ input16 = vld1_s16(inputPtr);
+ // widen 16-bit int to 32-bit int
+ input32 = vmovl_s16(input16);
+ // convert 32-bit int to float with scale
+ input_float = vcvtq_f32_s32(input32);
+
+ prod_re = vmulq_f32(input_float, tapsVal.val[0]);
+ prod_im = vmulq_f32(input_float, tapsVal.val[1]);
+
+ accumulator_val.val[0] = vaddq_f32(prod_re, accumulator_val.val[0]);
+ accumulator_val.val[1] = vaddq_f32(prod_im, accumulator_val.val[1]);
+
+ tapsPtr += 4;
+ inputPtr += 4;
+ }
+ vst2q_f32((float*)accumulator_vec, accumulator_val);
+ accumulator_vec[0] += accumulator_vec[1];
+ accumulator_vec[2] += accumulator_vec[3];
+ accumulator_vec[0] += accumulator_vec[2];
+
+ for(ii = quarter_points * 4; ii < num_points; ++ii) {
+ accumulator_vec[0] += *(tapsPtr++) * (float)(*(inputPtr++));
+ }
+
+ *result = accumulator_vec[0];
+}
+#endif /*LV_HAVE_NEON*/
#if LV_HAVE_SSE && LV_HAVE_MMX
- [Commit-gnuradio] [gnuradio] branch master updated (b3bbe56 -> 4869607), git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 03/22: volk: removing executable bit for volk_32f_x2_pow_32f.h, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 05/22: volk: popcnt support, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 04/22: volk: add neon support for 32fc_s32fc_multiply_32fc, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 12/22: volk: update profile to use the new 32u_byteswap puppet, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 06/22: volk: adding popcnt puppets to qa, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 09/22: volk: add neon version for 32f_binary_slicer_8i, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 08/22: volk: add neon protokernel for 16i_s32f_convert_32f., git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 02/22: volk: add neon kernels for 32fc->32f deinterleavers, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 07/22: volk: add neon kernel for 16i_32fc_dot_prod_32fc,
git <=
- [Commit-gnuradio] [gnuradio] 11/22: volk: add neon version of 32u_byteswap, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 10/22: volk: removed unused variable from neon binary slicer, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 01/22: volk: add neon kernel for 16i_convert_8i, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 14/22: volk: fixing 32u_byteswap puppet for SSE, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 13/22: volk: add neon versions for 32i bitwise operators, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 22/22: Merge commit '0c92479f', git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 19/22: volk: add neon log2 implementation and fix QA to properly test, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 15/22: volk: add neon version of complex<float> dot product, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 17/22: volk: add a neon table version of 16u_byteswap, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 21/22: volk: relax log2 qa constraints and use a higher order polynomial, git, 2014/10/31