[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] [gnuradio] 05/22: volk: popcnt support
From: |
git |
Subject: |
[Commit-gnuradio] [gnuradio] 05/22: volk: popcnt support |
Date: |
Fri, 31 Oct 2014 19:22:30 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script.
jcorgan pushed a commit to branch master
in repository gnuradio.
commit 068bc75e2a2254f1ea8a8607d22b38bc41eeaefd
Author: Nathan West <address@hidden>
Date: Sat Oct 18 17:57:55 2014 -0500
volk: popcnt support
Add a neon protokernel for 64-bit popcnt, and puppets so 64-bit and
32-bit versions can be tested with volk_profile
---
volk/apps/volk_profile.cc | 4 +-
volk/kernels/volk/volk_32u_popcntpuppet_32u.h | 47 ++++++++++++++++++++++
volk/kernels/volk/volk_64u_popcnt.h | 56 +++++++++++++++++++++++++++
volk/kernels/volk/volk_64u_popcntpuppet_64u.h | 47 ++++++++++++++++++++++
4 files changed, 152 insertions(+), 2 deletions(-)
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
index 5030836..e3f0ba7 100644
--- a/volk/apps/volk_profile.cc
+++ b/volk/apps/volk_profile.cc
@@ -220,12 +220,12 @@ int main(int argc, char *argv[]) {
VOLK_PROFILE(volk_32i_s32f_convert_32f, 1e-4, 100, 204602, 10000,
&results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_32i_x2_or_32i, 0, 0, 204602, 10000, &results,
benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_32u_byteswap, 0, 0, 204602, 2000, &results,
benchmark_mode, kernel_regex);
- //VOLK_PROFILE(volk_32u_popcnt, 0, 0, 2046, 10000, &results,
benchmark_mode, kernel_regex);
+ VOLK_PUPPET_PROFILE(volk_32u_popcntpuppet_32u, volk32u_popcnt_32u, 0, 0,
2046, 10000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_64f_convert_32f, 1e-4, 0, 204602, 10000, &results,
benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_64f_x2_max_64f, 1e-4, 0, 204602, 1000, &results,
benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_64f_x2_min_64f, 1e-4, 0, 204602, 1000, &results,
benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_64u_byteswap, 0, 0, 204602, 1000, &results,
benchmark_mode, kernel_regex);
- //VOLK_PROFILE(volk_64u_popcnt, 0, 0, 2046, 10000, &results,
benchmark_mode, kernel_regex);
+ VOLK_PUPPET_PROFILE(volk_64u_popcntpuppet_64u, volk_64u_popcnt, 0, 0,
2046, 10000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_8ic_deinterleave_16i_x2, 0, 0, 204602, 3000, &results,
benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2, 1e-4, 100, 204602, 3000,
&results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_8ic_deinterleave_real_16i, 0, 256, 204602, 3000,
&results, benchmark_mode, kernel_regex);
diff --git a/volk/kernels/volk/volk_32u_popcntpuppet_32u.h
b/volk/kernels/volk/volk_32u_popcntpuppet_32u.h
new file mode 100644
index 0000000..056983e
--- /dev/null
+++ b/volk/kernels/volk/volk_32u_popcntpuppet_32u.h
@@ -0,0 +1,47 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2014 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_volk_32u_popcntpuppet_32u_H
+#define INCLUDED_volk_32u_popcntpuppet_32u_H
+
+#include <stdint.h>
+#include <volk/volk_32u_popcnt.h>
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_32u_popcntpuppet_32u_generic(uint32_t* outVector,
const uint32_t* inVector, unsigned int num_points){
+ unsigned int ii;
+ for(ii=0; ii < num_points; ++ii) {
+ volk_32u_popcnt_generic(outVector+ii, *(inVector+ii) );
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#ifdef LV_HAVE_SSE_4_2
+static inline void volk_32u_popcntpuppet_32u_a_sse4_2(uint32_t* outVector,
const uint32_t* inVector, unsigned int num_points){
+ unsigned int ii;
+ for(ii=0; ii < num_points; ++ii) {
+ volk_32u_popcnt_a_sse4_2(outVector+ii, *(inVector+ii) );
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_32fc_s32fc_rotatorpuppet_32fc_a_H */
diff --git a/volk/kernels/volk/volk_64u_popcnt.h
b/volk/kernels/volk/volk_64u_popcnt.h
index d425cd5..5eb28c7 100644
--- a/volk/kernels/volk/volk_64u_popcnt.h
+++ b/volk/kernels/volk/volk_64u_popcnt.h
@@ -71,4 +71,60 @@ static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret,
const uint64_t value)
#endif /*LV_HAVE_SSE4_2*/
+#if LV_HAVE_NEON
+#include <arm_neon.h>
+static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value) {
+ /* TABLE LUP
+ unsigned char table[] = {0, 1, 1, 2, 1, 2, 2, 3,
+ 1, 2, 2, 3, 2, 3, 3, 4,
+ 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 2, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5,
+ 3, 4, 4, 5, 4, 5, 5, 6};
+
+ // we're stuck with a 64-element table, so treat the MSBs
+ // of each byte as 0 and sum them individually.
+ uint64_t input_7bit = values & 0x7F7F7F7F7F7F7F7F;
+ uint64_t input_msbs = value & 0x8080808080808080;
+ uint64_t sum = (input_msbs >> 8) ;
+ sum += (input_msbs >> 16);
+ sum += (input_msbs >> 24);
+ sum += (input_msbs >> 32);
+ sum += (input_msbs >> 40);
+ sum += (input_msbs >> 48);
+ sum += (input_msbs >> 56);
+ sum += (input_msbs >> 64);
+
+ uint8x8x4_t table_val;
+ uint8x8_t input_val;
+ uint16x8x2_t intermediate_sum;
+ uint32x8_t intermediate_sum;
+
+ // load the table and input value
+ table_val = vld4q_u8(table);
+ input_val = vld1_u8((unsigned char *) &value);
+
+ // perform the lookup, output is uint8x8_t
+ input_val = vtbl4_u8(table_val, input_val);
+ */
+
+ uint8x8_t input_val, count8x8_val;
+ uint16x4_t count16x4_val;
+ uint32x2_t count32x2_val;
+ uint64x1_t count64x1_val;
+
+ input_val = vld1_u8((unsigned char *) &value);
+ count8x8_val = vcnt_u8(input_val);
+ count16x4_val = vpaddl_u8(count8x8_val);
+ count32x2_val = vpaddl_u16(count16x4_val);
+ count64x1_val = vpaddl_u32(count32x2_val);
+ vst1_u64(ret, count64x1_val);
+
+ //*ret = _mm_popcnt_u64(value);
+
+}
+#endif /*LV_HAVE_NEON*/
+
#endif /*INCLUDED_volk_64u_popcnt_a_H*/
diff --git a/volk/kernels/volk/volk_64u_popcntpuppet_64u.h
b/volk/kernels/volk/volk_64u_popcntpuppet_64u.h
new file mode 100644
index 0000000..5837d0f
--- /dev/null
+++ b/volk/kernels/volk/volk_64u_popcntpuppet_64u.h
@@ -0,0 +1,47 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2014 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_volk_64u_popcntpuppet_64u_H
+#define INCLUDED_volk_64u_popcntpuppet_64u_H
+
+#include <stdint.h>
+#include <volk/volk_64u_popcnt.h>
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_64u_popcntpuppet_64u_generic(uint64_t* outVector,
const uint64_t* inVector, unsigned int num_points){
+ unsigned int ii;
+ for(ii=0; ii < num_points; ++ii) {
+ volk_64u_popcnt_generic(outVector+ii, *(inVector+ii) );
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#ifdef LV_HAVE_NEON
+static inline void volk_64u_popcntpuppet_64u_neon(uint64_t* outVector, const
uint64_t* inVector, unsigned int num_points){
+ unsigned int ii;
+ for(ii=0; ii < num_points; ++ii) {
+ volk_64u_popcnt_neon(outVector+ii, *(inVector+ii) );
+ }
+}
+#endif /* LV_HAVE_NEON */
+
+#endif /* INCLUDED_volk_32fc_s32fc_rotatorpuppet_32fc_a_H */
- [Commit-gnuradio] [gnuradio] branch master updated (b3bbe56 -> 4869607), git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 03/22: volk: removing executable bit for volk_32f_x2_pow_32f.h, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 05/22: volk: popcnt support,
git <=
- [Commit-gnuradio] [gnuradio] 04/22: volk: add neon support for 32fc_s32fc_multiply_32fc, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 12/22: volk: update profile to use the new 32u_byteswap puppet, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 06/22: volk: adding popcnt puppets to qa, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 09/22: volk: add neon version for 32f_binary_slicer_8i, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 08/22: volk: add neon protokernel for 16i_s32f_convert_32f., git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 02/22: volk: add neon kernels for 32fc->32f deinterleavers, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 07/22: volk: add neon kernel for 16i_32fc_dot_prod_32fc, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 11/22: volk: add neon version of 32u_byteswap, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 10/22: volk: removed unused variable from neon binary slicer, git, 2014/10/31
- [Commit-gnuradio] [gnuradio] 01/22: volk: add neon kernel for 16i_convert_8i, git, 2014/10/31