[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] [gnuradio] 11/22: volk: add neon version of 32u_bytesw
From: |
git |
Subject: |
[Commit-gnuradio] [gnuradio] 11/22: volk: add neon version of 32u_byteswap |
Date: |
Fri, 31 Oct 2014 19:22:30 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script.
jcorgan pushed a commit to branch master
in repository gnuradio.
commit f2bc76cc65ffba51a141950f98e75364e49df874
Author: Nathan West <address@hidden>
Date: Mon Oct 20 19:05:11 2014 -0500
volk: add neon version of 32u_byteswap
The QA needed a puppet because qa_utils only checks output buffers,
so this also introduces a puppet
---
volk/kernels/volk/volk_32u_byteswap.h | 55 ++++++++++++++++++++++++-
volk/kernels/volk/volk_32u_byteswappuppet_32u.h | 45 ++++++++++++++++++++
2 files changed, 98 insertions(+), 2 deletions(-)
diff --git a/volk/kernels/volk/volk_32u_byteswap.h
b/volk/kernels/volk/volk_32u_byteswap.h
index 74d9a0b..0194efc 100644
--- a/volk/kernels/volk/volk_32u_byteswap.h
+++ b/volk/kernels/volk/volk_32u_byteswap.h
@@ -73,6 +73,59 @@ static inline void volk_32u_byteswap_u_sse2(uint32_t*
intsToSwap, unsigned int n
}
#endif /* LV_HAVE_SSE2 */
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+/*!
+ \brief Byteswaps (in-place) an aligned vector of int32_t's.
+ \param intsToSwap The vector of data to byte swap
+ \param numDataPoints The number of data points
+*/
+static inline void volk_32u_byteswap_neon(uint32_t* intsToSwap, unsigned int
num_points){
+ uint32_t* inputPtr = intsToSwap;
+ unsigned int number = 0;
+ unsigned int n8points = num_points / 8;
+
+ uint8x8x4_t input_table;
+ uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
+ uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
+
+ /* these magic numbers are used as byte-indeces in the LUT.
+ they are pre-computed to save time. A simple C program
+ can calculate them; for example for lookup01:
+ uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
+ for(ii=0; ii < 8; ++ii) {
+ index += ((uint64_t)(*(chars+ii))) << (ii*8);
+ }
+ */
+ int_lookup01 = vcreate_u8(74609667900706840);
+ int_lookup23 = vcreate_u8(219290013576860186);
+ int_lookup45 = vcreate_u8(363970359253013532);
+ int_lookup67 = vcreate_u8(508650704929166878);
+
+ for(number = 0; number < n8points; ++number){
+ input_table = vld4_u8((uint8_t*) inputPtr);
+ swapped_int01 = vtbl4_u8(input_table, int_lookup01);
+ swapped_int23 = vtbl4_u8(input_table, int_lookup23);
+ swapped_int45 = vtbl4_u8(input_table, int_lookup45);
+ swapped_int67 = vtbl4_u8(input_table, int_lookup67);
+ vst1_u8((uint8_t*) inputPtr, swapped_int01);
+ vst1_u8((uint8_t*) (inputPtr+2), swapped_int23);
+ vst1_u8((uint8_t*) (inputPtr+4), swapped_int45);
+ vst1_u8((uint8_t*) (inputPtr+6), swapped_int67);
+
+ inputPtr += 8;
+ }
+
+ for(number = n8points * 8; number < num_points; ++number){
+ uint32_t output = *inputPtr;
+ output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) |
((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
+
+ *inputPtr = output;
+ inputPtr++;
+ }
+}
+#endif /* LV_HAVE_NEON */
+
#ifdef LV_HAVE_GENERIC
/*!
\brief Byteswaps (in-place) an aligned vector of int32_t's.
@@ -94,8 +147,6 @@ static inline void volk_32u_byteswap_generic(uint32_t*
intsToSwap, unsigned int
#endif /* LV_HAVE_GENERIC */
-
-
#endif /* INCLUDED_volk_32u_byteswap_u_H */
#ifndef INCLUDED_volk_32u_byteswap_a_H
#define INCLUDED_volk_32u_byteswap_a_H
diff --git a/volk/kernels/volk/volk_32u_byteswappuppet_32u.h
b/volk/kernels/volk/volk_32u_byteswappuppet_32u.h
new file mode 100644
index 0000000..e50ad1d
--- /dev/null
+++ b/volk/kernels/volk/volk_32u_byteswappuppet_32u.h
@@ -0,0 +1,45 @@
+#ifndef INCLUDED_volk_32u_byteswappuppet_32u_H
+#define INCLUDED_volk_32u_byteswappuppet_32u_H
+
+
+#include <stdint.h>
+#include <volk/volk_32u_byteswap.h>
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_32u_byteswappuppet_32u_generic(uint32_t*output,
uint32_t* intsToSwap, unsigned int num_points){
+
+ volk_32u_byteswap_generic((uint32_t*)intsToSwap, num_points);
+ memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint32_t));
+
+}
+#endif
+
+#ifdef LV_HAVE_NEON
+static inline void volk_32u_byteswappuppet_32u_neon(uint32_t*output, uint32_t*
intsToSwap, unsigned int num_points){
+
+ volk_32u_byteswap_neon((uint32_t*)intsToSwap, num_points);
+ memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint32_t));
+
+}
+#endif
+
+#ifdef LV_HAVE_SSE2
+static inline void volk_32u_byteswappuppet_u_sse2(uint32_t* intsToSwap,
unsigned int num_points){
+
+ volk_32u_byteswap_u_sse2((uint32_t*)intsToSwap, num_points);
+ memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint32_t));
+
+}
+#endif
+
+#ifdef LV_HAVE_SSE2
+static inline void volk_32u_byteswappuppet_a_sse2(uint32_t* intsToSwap,
unsigned int num_points){
+
+ volk_32u_byteswap_a_sse2((uint32_t*)intsToSwap, num_points);
+ memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint32_t));
+
+}
+#endif
+
+
+#endif
- [Commit-gnuradio] [gnuradio] branch master updated (b3bbe56 -> 4869607), git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 03/22: volk: removing executable bit for volk_32f_x2_pow_32f.h, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 05/22: volk: popcnt support, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 04/22: volk: add neon support for 32fc_s32fc_multiply_32fc, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 12/22: volk: update profile to use the new 32u_byteswap puppet, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 06/22: volk: adding popcnt puppets to qa, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 09/22: volk: add neon version for 32f_binary_slicer_8i, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 08/22: volk: add neon protokernel for 16i_s32f_convert_32f., git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 02/22: volk: add neon kernels for 32fc->32f deinterleavers, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 07/22: volk: add neon kernel for 16i_32fc_dot_prod_32fc, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 11/22: volk: add neon version of 32u_byteswap,
git <=
- [Commit-gnuradio] [gnuradio] 10/22: volk: removed unused variable from neon binary slicer, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 01/22: volk: add neon kernel for 16i_convert_8i, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 14/22: volk: fixing 32u_byteswap puppet for SSE, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 13/22: volk: add neon versions for 32i bitwise operators, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 22/22: Merge commit '0c92479f', git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 19/22: volk: add neon log2 implementation and fix QA to properly test, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 15/22: volk: add neon version of complex<float> dot product, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 17/22: volk: add a neon table version of 16u_byteswap, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 21/22: volk: relax log2 qa constraints and use a higher order polynomial, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 20/22: volk: fix memory overrun/corruption in neon binary_slicer_8i, git, 2014/10/31