[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] [gnuradio] 02/22: volk: add neon kernels for 32fc->32f
From: |
git |
Subject: |
[Commit-gnuradio] [gnuradio] 02/22: volk: add neon kernels for 32fc->32f deinterleavers |
Date: |
Fri, 31 Oct 2014 19:22:30 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script.
jcorgan pushed a commit to branch master
in repository gnuradio.
commit 58432bad48831ae2f3f14fa5d3a8472d7c55ffae
Author: Nathan West <address@hidden>
Date: Fri Oct 17 21:04:41 2014 -0500
volk: add neon kernels for 32fc->32f deinterleavers
---
volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h | 33 ++++++++++++++++++++++
.../kernels/volk/volk_32fc_deinterleave_imag_32f.h | 29 +++++++++++++++++++
.../kernels/volk/volk_32fc_deinterleave_real_32f.h | 26 +++++++++++++++++
3 files changed, 88 insertions(+)
diff --git a/volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h
b/volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h
index 5b485ec..6874966 100644
--- a/volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h
+++ b/volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h
@@ -122,6 +122,39 @@ static inline void
volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer, float* qB
}
#endif /* LV_HAVE_SSE */
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+/*!
+ \brief Deinterleaves the complex vector into I & Q vector data
+ \param complexVector The complex input vector
+ \param iBuffer The I buffer output data
+ \param qBuffer The Q buffer output data
+ \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_32f_x2_neon(float* iBuffer, float*
qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ unsigned int quarter_points = num_points / 4;
+ const float* complexVectorPtr = (float*)complexVector;
+ float* iBufferPtr = iBuffer;
+ float* qBufferPtr = qBuffer;
+ float32x4x2_t complexInput;
+
+ for(number = 0; number < quarter_points; number++){
+ complexInput = vld2q_f32(complexVectorPtr);
+ vst1q_f32( iBufferPtr, complexInput.val[0] );
+ vst1q_f32( qBufferPtr, complexInput.val[1] );
+ complexVectorPtr += 8;
+ iBufferPtr += 4;
+ qBufferPtr += 4;
+ }
+
+ for(number = quarter_points*4; number < num_points; number++){
+ *iBufferPtr++ = *complexVectorPtr++;
+ *qBufferPtr++ = *complexVectorPtr++;
+ }
+}
+#endif /* LV_HAVE_NEON */
+
#ifdef LV_HAVE_GENERIC
/*!
\brief Deinterleaves the complex vector into I & Q vector data
diff --git a/volk/kernels/volk/volk_32fc_deinterleave_imag_32f.h
b/volk/kernels/volk/volk_32fc_deinterleave_imag_32f.h
index 6fabedd..f80265d 100644
--- a/volk/kernels/volk/volk_32fc_deinterleave_imag_32f.h
+++ b/volk/kernels/volk/volk_32fc_deinterleave_imag_32f.h
@@ -110,6 +110,35 @@ static inline void
volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer, const l
}
#endif /* LV_HAVE_SSE */
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+/*!
+ \brief Deinterleaves the complex vector into Q vector data
+ \param complexVector The complex input vector
+ \param qBuffer The Q buffer output data
+ \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_imag_32f_neon(float* qBuffer, const
lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ unsigned int quarter_points = num_points / 4;
+ const float* complexVectorPtr = (float*)complexVector;
+ float* qBufferPtr = qBuffer;
+ float32x4x2_t complexInput;
+
+ for(number = 0; number < quarter_points; number++){
+ complexInput = vld2q_f32(complexVectorPtr);
+ vst1q_f32( qBufferPtr, complexInput.val[1] );
+ complexVectorPtr += 8;
+ qBufferPtr += 4;
+ }
+
+ for(number = quarter_points*4; number < num_points; number++){
+ complexVectorPtr++;
+ *qBufferPtr++ = *complexVectorPtr++;
+ }
+}
+#endif /* LV_HAVE_NEON */
+
#ifdef LV_HAVE_GENERIC
/*!
\brief Deinterleaves the complex vector into Q vector data
diff --git a/volk/kernels/volk/volk_32fc_deinterleave_real_32f.h
b/volk/kernels/volk/volk_32fc_deinterleave_real_32f.h
index 9200206..c0e8d8f 100644
--- a/volk/kernels/volk/volk_32fc_deinterleave_real_32f.h
+++ b/volk/kernels/volk/volk_32fc_deinterleave_real_32f.h
@@ -84,7 +84,33 @@ static inline void
volk_32fc_deinterleave_real_32f_generic(float* iBuffer, const
}
#endif /* LV_HAVE_GENERIC */
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+/*!
+ \brief Deinterleaves the complex vector into I vector data
+ \param complexVector The complex input vector
+ \param iBuffer The I buffer output data
+ \param num_points The number of complex data values to be deinterleaved
+*/
+static inline void volk_32fc_deinterleave_real_32f_neon(float* iBuffer, const
lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ unsigned int quarter_points = num_points / 4;
+ const float* complexVectorPtr = (float*)complexVector;
+ float* iBufferPtr = iBuffer;
+ float32x4x2_t complexInput;
+ for(number = 0; number < quarter_points; number++){
+ complexInput = vld2q_f32(complexVectorPtr);
+ vst1q_f32( iBufferPtr, complexInput.val[0] );
+ complexVectorPtr += 8;
+ iBufferPtr += 4;
+ }
+ for(number = quarter_points*4; number < num_points; number++){
+ *iBufferPtr++ = *complexVectorPtr++;
+ complexVectorPtr++;
+ }
+}
+#endif /* LV_HAVE_NEON */
#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */
- [Commit-gnuradio] [gnuradio] branch master updated (b3bbe56 -> 4869607), git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 03/22: volk: removing executable bit for volk_32f_x2_pow_32f.h, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 05/22: volk: popcnt support, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 04/22: volk: add neon support for 32fc_s32fc_multiply_32fc, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 12/22: volk: update profile to use the new 32u_byteswap puppet, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 06/22: volk: adding popcnt puppets to qa, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 09/22: volk: add neon version for 32f_binary_slicer_8i, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 08/22: volk: add neon protokernel for 16i_s32f_convert_32f., git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 02/22: volk: add neon kernels for 32fc->32f deinterleavers,
git <=
- [Commit-gnuradio] [gnuradio] 07/22: volk: add neon kernel for 16i_32fc_dot_prod_32fc, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 11/22: volk: add neon version of 32u_byteswap, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 10/22: volk: removed unused variable from neon binary slicer, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 01/22: volk: add neon kernel for 16i_convert_8i, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 14/22: volk: fixing 32u_byteswap puppet for SSE, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 13/22: volk: add neon versions for 32i bitwise operators, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 22/22: Merge commit '0c92479f', git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 19/22: volk: add neon log2 implementation and fix QA to properly test, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 15/22: volk: add neon version of complex<float> dot product, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 17/22: volk: add a neon table version of 16u_byteswap, git, 2014/10/31