[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[patch] Use fftw if available
From: |
Mumit Khan |
Subject: |
[patch] Use fftw if available |
Date: |
Sat, 21 Apr 2001 15:20:50 -0500 (CDT) |
[ I'm not subscribed to the list, so please copy me if appropriate ]
The attached patch is a first cut at enabling fftw support in Octave. I
did some quick tests, and even while the fftw interface here is suboptimal
at best, there is still significant performance boost.
Must re-run autogen, and use --with-fftw to use fftw (default is off). The
configure script will check for dfftw first and then fftw, and should do the
right thing for either case.
I'm not using wisdom in this implementation, but something we should
consider for the future. Perhaps by adding a "fftw" interface that
will allow tweaking the planner.
For 1d fft of a matrix, there may be a small speedup by switching to the
specific interface (specifying howmany and stride) instead of calling fftw
many times for each column/row vector.
Regards,
Mumit
toplevel/ChangeLog:
2001-04-21 Mumit Khan <address@hidden>
* configure.in: Support for --with-fftw.
(FFT_DIR, FFTW_LIBS): New substitutions.
* Makeconf.in (FFTW_LIBS): New variable.
* acconfig.h (HAVE_FFTW): New macro.
libcruft/ChangeLog:
2001-04-21 Mumit Khan <address@hidden>
* Makefile.in (CRUFT_DIRS): Substitute @address@hidden
liboctave/ChangeLog:
2001-04-21 Mumit Khan <address@hidden>
* oct-fftw.h, oct-fftw.cc: New files.
* Makefile.in (INCLUDES, SOURCES): Add new files.
* CMatrix.cc (ComplexMatrix::{fourier, ifourier, fourier2d,
ifourier2d}): Use fftw if available.
* dMatrix.cc (Matrix::{fourier, ifourier, fourier2d, ifourier2d}):
Likewise.
src/ChangeLog:
2001-04-21 Mumit Khan <address@hidden>
* Makefile.in (octave): Add $(FFTW_LIBS).
Index: Makeconf.in
===================================================================
RCS file: /cvs/octave/Makeconf.in,v
retrieving revision 1.111
diff -u -3 -p -r1.111 Makeconf.in
--- Makeconf.in 2001/04/19 19:50:53 1.111
+++ Makeconf.in 2001/04/21 19:58:43
@@ -155,6 +155,7 @@ LIBOCTAVE = @LIBOCTAVE@
LIBCRUFT = @LIBCRUFT@
BLAS_LIBS = @BLAS_LIBS@
+FFTW_LIBS = @FFTW_LIBS@
LIBS = @LIBS@
# The arguments passed to configure.
Index: acconfig.h
===================================================================
RCS file: /cvs/octave/acconfig.h,v
retrieving revision 1.46
diff -u -3 -p -r1.46 acconfig.h
--- acconfig.h 2001/02/07 18:46:35 1.46
+++ acconfig.h 2001/04/21 19:58:43
@@ -58,6 +58,9 @@
/* Define if you have BSD style signals. */
#undef HAVE_BSD_SIGNALS
+/* Define if you have FFTW installed. */
+#undef HAVE_FFTW
+
/* Define if you have POSIX style signals. */
#undef HAVE_POSIX_SIGNALS
Index: configure.in
===================================================================
RCS file: /cvs/octave/configure.in,v
retrieving revision 1.342
diff -u -3 -p -r1.342 configure.in
--- configure.in 2001/03/01 16:54:31 1.342
+++ configure.in 2001/04/21 19:58:43
@@ -354,6 +354,38 @@ fi
# ----------------------------------------------------------------------
+# Checks for FFTW header and library.
+
+# subdirectories of libcruft to build if they aren't found on the system:
+FFT_DIR="fftpack"
+AC_SUBST(FFT_DIR)
+
+# Installed fftw library, if any.
+FFTW_LIBS=''
+AC_SUBST(FFTW_LIBS)
+
+AC_ARG_WITH(fftw,
+ [ --with-fftw use installed fftw instead of included fftpack],
+ with_fftw=yes, with_fftw=no)
+
+if test "$with_fftw" = "yes"; then
+ AC_CHECK_HEADERS(dfftw.h fftw.h)
+ if test "$ac_cv_header_dfftw_h" = yes \
+ || test "$ac_cv_header_fftw_h" = yes; then
+ AC_CHECK_LIB(dfftw, fftw_create_plan, FFTW_LIBS="-ldfftw",
+ [AC_CHECK_LIB(fftw, fftw_create_plan, FFTW_LIBS="-lfftw", with_fftw=no)])
+ else
+ with_fftw=no
+ fi
+fi
+
+if test "$with_fftw" = yes; then
+ FFT_DIR=''
+ AC_DEFINE(HAVE_FFTW)
+fi
+
+# ----------------------------------------------------------------------
+
### We need these before trying to find libf2c.
OCTAVE_PROG_AR
Index: libcruft/Makefile.in
===================================================================
RCS file: /cvs/octave/libcruft/Makefile.in,v
retrieving revision 1.69
diff -u -3 -p -r1.69 Makefile.in
--- libcruft/Makefile.in 2001/03/27 19:12:58 1.69
+++ libcruft/Makefile.in 2001/04/21 19:58:43
@@ -28,7 +28,7 @@ INSTALL_DATA = @INSTALL_DATA@
# e.g. if they are already present on the system. For these, their
# dirname is substituted by configure and may be the empty string.
-CRUFT_DIRS = amos @BLAS_DIR@ blas-xtra dassl fftpack @LAPACK_DIR@ \
+CRUFT_DIRS = amos @BLAS_DIR@ blas-xtra dassl @FFT_DIR@ @LAPACK_DIR@ \
lapack-xtra linpack minpack misc odepack ordered-qz quadpack \
ranlib slatec-err slatec-fn villad
Index: liboctave/CMatrix.cc
===================================================================
RCS file: /cvs/octave/liboctave/CMatrix.cc,v
retrieving revision 1.77
diff -u -3 -p -r1.77 CMatrix.cc
--- liboctave/CMatrix.cc 2001/01/31 22:15:52 1.77
+++ liboctave/CMatrix.cc 2001/04/21 19:58:43
@@ -55,6 +55,10 @@ Software Foundation, 59 Temple Place - S
#include "mx-inlines.cc"
#include "oct-cmplx.h"
+#ifdef HAVE_FFTW
+# include "oct-fftw.h"
+#endif
+
// Fortran functions we call.
extern "C"
@@ -954,6 +958,8 @@ ComplexMatrix::pseudo_inverse (double to
return retval;
}
+#ifndef HAVE_FFTW
+
ComplexMatrix
ComplexMatrix::fourier (void) const
{
@@ -1153,6 +1159,102 @@ ComplexMatrix::ifourier2d (void) const
return retval;
}
+
+#else /* ! HAVE_FFTW */
+
+ComplexMatrix
+ComplexMatrix::fourier (void) const
+{
+ size_t nr = rows ();
+ size_t nc = cols ();
+
+ ComplexMatrix retval (nr, nc);
+
+ size_t npts, nsamples;
+
+ if (nr == 1 || nc == 1)
+ {
+ npts = nr > nc ? nr : nc;
+ nsamples = 1;
+ }
+ else
+ {
+ npts = nr;
+ nsamples = nc;
+ }
+
+ const Complex *in (data ());
+ Complex *out (retval.fortran_vec ());
+
+ for (size_t i = 0; i < nsamples; i++)
+ {
+ octave_fftw::fft (&in[npts * i], &out[npts * i], npts);
+ }
+
+ return retval;
+}
+
+ComplexMatrix
+ComplexMatrix::ifourier (void) const
+{
+ size_t nr = rows ();
+ size_t nc = cols ();
+
+ ComplexMatrix retval (nr, nc);
+
+ size_t npts, nsamples;
+
+ if (nr == 1 || nc == 1)
+ {
+ npts = nr > nc ? nr : nc;
+ nsamples = 1;
+ }
+ else
+ {
+ npts = nr;
+ nsamples = nc;
+ }
+
+ const Complex *in (data ());
+ Complex *out (retval.fortran_vec ());
+
+ for (size_t i = 0; i < nsamples; i++)
+ {
+ octave_fftw::ifft (&in[npts * i], &out[npts * i], npts);
+ }
+
+ return retval;
+}
+
+ComplexMatrix
+ComplexMatrix::fourier2d (void) const
+{
+ int nr = rows ();
+ int nc = cols ();
+
+ ComplexMatrix retval (*this);
+ // Note the order of passing the rows and columns to account for
+ // column-major storage.
+ octave_fftw::fft2d (retval.fortran_vec (), nc, nr);
+
+ return retval;
+}
+
+ComplexMatrix
+ComplexMatrix::ifourier2d (void) const
+{
+ int nr = rows ();
+ int nc = cols ();
+
+ ComplexMatrix retval (*this);
+ // Note the order of passing the rows and columns to account for
+ // column-major storage.
+ octave_fftw::ifft2d (retval.fortran_vec (), nc, nr);
+
+ return retval;
+}
+
+#endif /* ! HAVE_FFTW */
ComplexDET
ComplexMatrix::determinant (void) const
Index: liboctave/Makefile.in
===================================================================
RCS file: /cvs/octave/liboctave/Makefile.in,v
retrieving revision 1.136
diff -u -3 -p -r1.136 Makefile.in
--- liboctave/Makefile.in 2001/02/06 01:57:01 1.136
+++ liboctave/Makefile.in 2001/04/21 19:58:43
@@ -51,9 +51,9 @@ INCLUDES := Bounds.h CollocWt.h DAE.h DA
dir-ops.h file-ops.h file-stat.h getopt.h glob-match.h \
idx-vector.h lo-ieee.h lo-mappers.h lo-specfun.h lo-sysdep.h \
lo-utils.h mach-info.h oct-alloc.h oct-cmplx.h oct-env.h \
- oct-getopt.h oct-group.h oct-kpse.h oct-passwd.h oct-rl-edit.h \
- oct-rl-hist.h oct-shlib.h oct-syscalls.h oct-time.h pathlen.h \
- pathsearch.h prog-args.h statdefs.h str-vec.h sun-utils.h \
+ oct-fftw.h oct-getopt.h oct-group.h oct-kpse.h oct-passwd.h \
+ oct-rl-edit.h oct-rl-hist.h oct-shlib.h oct-syscalls.h oct-time.h \
+ pathlen.h pathsearch.h prog-args.h statdefs.h str-vec.h sun-utils.h \
sysdir.h systime.h syswait.h \
$(MATRIX_INC) \
$(MX_OP_INC) \
@@ -90,10 +90,10 @@ SOURCES := Bounds.cc CollocWt.cc DAE.cc
file-stat.cc filemode.c getopt.c getopt1.c glob-match.cc \
idx-vector.cc lo-cutils.c lo-ieee.cc lo-mappers.cc lo-specfun.cc \
lo-sysdep.cc lo-utils.cc mach-info.cc mkdir.c oct-alloc.cc \
- oct-env.cc oct-getopt.c oct-group.cc oct-kpse.c oct-passwd.cc \
- oct-rl-edit.c oct-rl-hist.c oct-shlib.cc oct-syscalls.cc \
- oct-time.cc pathsearch.cc prog-args.cc rename.c rmdir.c \
- strftime.c strptime.c str-vec.cc tempname.c tempnam.c \
+ oct-env.cc oct-fftw.cc oct-getopt.c oct-group.cc oct-kpse.c \
+ oct-passwd.cc oct-rl-edit.c oct-rl-hist.c oct-shlib.cc \
+ oct-syscalls.cc oct-time.cc pathsearch.cc prog-args.cc rename.c \
+ rmdir.c strftime.c strptime.c str-vec.cc tempname.c tempnam.c \
$(TEMPLATE_SRC) \
$(TI_SRC) \
$(MATRIX_SRC) \
Index: liboctave/dMatrix.cc
===================================================================
RCS file: /cvs/octave/liboctave/dMatrix.cc,v
retrieving revision 1.81
diff -u -3 -p -r1.81 dMatrix.cc
--- liboctave/dMatrix.cc 2001/01/31 22:15:53 1.81
+++ liboctave/dMatrix.cc 2001/04/21 19:58:43
@@ -50,6 +50,10 @@ Software Foundation, 59 Temple Place - S
#include "mx-inlines.cc"
#include "oct-cmplx.h"
+#ifdef HAVE_FFTW
+# include "oct-fftw.h"
+#endif
+
// Fortran functions we call.
extern "C"
@@ -648,6 +652,8 @@ Matrix::pseudo_inverse (double tol)
}
}
+#ifndef HAVE_FFTW
+
ComplexMatrix
Matrix::fourier (void) const
{
@@ -848,6 +854,103 @@ Matrix::ifourier2d (void) const
return retval;
}
+#else /* ! HAVE_FFTW */
+
+ComplexMatrix
+Matrix::fourier (void) const
+{
+ size_t nr = rows ();
+ size_t nc = cols ();
+
+ ComplexMatrix retval (nr, nc);
+
+ size_t npts, nsamples;
+
+ if (nr == 1 || nc == 1)
+ {
+ npts = nr > nc ? nr : nc;
+ nsamples = 1;
+ }
+ else
+ {
+ npts = nr;
+ nsamples = nc;
+ }
+
+ ComplexMatrix tmp (*this);
+ Complex *in (tmp.fortran_vec ());
+ Complex *out (retval.fortran_vec ());
+
+ for (size_t i = 0; i < nsamples; i++)
+ {
+ octave_fftw::fft (&in[npts * i], &out[npts * i], npts);
+ }
+
+ return retval;
+}
+
+ComplexMatrix
+Matrix::ifourier (void) const
+{
+ size_t nr = rows ();
+ size_t nc = cols ();
+
+ ComplexMatrix retval (nr, nc);
+
+ size_t npts, nsamples;
+
+ if (nr == 1 || nc == 1)
+ {
+ npts = nr > nc ? nr : nc;
+ nsamples = 1;
+ }
+ else
+ {
+ npts = nr;
+ nsamples = nc;
+ }
+
+ ComplexMatrix tmp (*this);
+ Complex *in (tmp.fortran_vec ());
+ Complex *out (retval.fortran_vec ());
+
+ for (size_t i = 0; i < nsamples; i++)
+ {
+ octave_fftw::ifft (&in[npts * i], &out[npts * i], npts);
+ }
+
+ return retval;
+}
+
+ComplexMatrix
+Matrix::fourier2d (void) const
+{
+ int nr = rows ();
+ int nc = cols ();
+
+ ComplexMatrix retval (*this);
+ // Note the order of passing the rows and columns to account for
+ // column-major storage.
+ octave_fftw::fft2d (retval.fortran_vec (), nc, nr);
+
+ return retval;
+}
+
+ComplexMatrix
+Matrix::ifourier2d (void) const
+{
+ int nr = rows ();
+ int nc = cols ();
+
+ ComplexMatrix retval (*this);
+ // Note the order of passing the rows and columns to account for
+ // column-major storage.
+ octave_fftw::ifft2d (retval.fortran_vec (), nc, nr);
+
+ return retval;
+}
+
+#endif /* ! HAVE_FFTW */
DET
Matrix::determinant (void) const
{
Index: src/Makefile.in
===================================================================
RCS file: /cvs/octave/src/Makefile.in,v
retrieving revision 1.260
diff -u -3 -p -r1.260 Makefile.in
--- src/Makefile.in 2001/03/01 16:54:32 1.260
+++ src/Makefile.in 2001/04/21 19:58:44
@@ -259,7 +259,7 @@ octave: stamp-prereq $(LIBRARIES) stamp-
octave.o builtins.o ops.o $(XERBLA) $(DLD_STATIC_OBJ) \
$(OCTAVE_LFLAGS) \
$(OCTAVE_LIBS) \
- $(LEXLIB) $(TERMLIBS) $(BLAS_LIBS) $(LIBS) $(FLIBS)
+ $(LEXLIB) $(TERMLIBS) $(BLAS_LIBS) $(FFTW_LIBS) $(LIBS) $(FLIBS)
stmp-pic: pic
@if [ -f stmp-pic ]; then \
--- liboctave/oct-fftw.h.orig Sat Apr 21 14:58:31 2001
+++ liboctave/oct-fftw.h Sat Apr 21 11:39:07 2001
@@ -0,0 +1,56 @@
+/*
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+*/
+
+#if !defined (octave_oct_fftw_h)
+#define octave_oct_fftw_h 1
+
+#include <cstddef>
+#ifdef HAVE_DFFTW_H
+# include <dfftw.h>
+#else
+# include <fftw.h>
+#endif
+
+#include "oct-cmplx.h"
+
+class
+octave_fftw
+{
+public:
+ static int fft (const Complex*, Complex *, size_t);
+ static int ifft (const Complex*, Complex *, size_t);
+
+ static int fft2d (Complex*, size_t, size_t);
+ static int ifft2d (Complex*, size_t, size_t);
+
+private:
+ octave_fftw ();
+ octave_fftw (const octave_fftw&);
+ octave_fftw& operator = (const octave_fftw&);
+};
+
+#endif/*octave_oct_fftw_h*/
+
+/*
+;;; Local Variables: ***
+;;; mode: C++ ***
+;;; End: ***
+*/
+
--- liboctave/oct-fftw.cc.orig Sat Apr 21 14:58:33 2001
+++ liboctave/oct-fftw.cc Sat Apr 21 12:40:31 2001
@@ -0,0 +1,183 @@
+/*
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#ifdef HAVE_FFTW
+
+#include "oct-fftw.h"
+#include "lo-error.h"
+
+//
+// Helper class to create and cache fftw plans for both 1d and 2d. This
+// implementation uses FFTW_ESTIMATE to create the plans, which in theory
+// is suboptimal, but provides quite reasonable performance. Future
+// enhancement will be to add a dynamically loadable interface ("fftw")
+// to manipulate fftw wisdom so that users may choose the appropriate
+// planner.
+//
+class
+octave_fftw_planner
+{
+public:
+ octave_fftw_planner ();
+
+ fftw_plan create_plan (fftw_direction, size_t);
+ fftwnd_plan create_plan2d (fftw_direction, size_t, size_t);
+
+private:
+ int plan_flags;
+
+ fftw_plan plan[2];
+ fftwnd_plan plan2d[2];
+
+ size_t n[2];
+ size_t n2d[2][2];
+};
+
+octave_fftw_planner::octave_fftw_planner ()
+{
+ plan_flags = FFTW_ESTIMATE;
+
+ plan[0] = plan[1] = 0;
+ plan2d[0] = plan2d[1] = 0;
+
+ n[0] = n[1] = 0;
+ n2d[0][0] = n2d[0][1] = n2d[1][0] = n2d[1][1] = 0;
+}
+
+fftw_plan
+octave_fftw_planner::create_plan (fftw_direction dir, size_t npts)
+{
+ size_t which = (dir == FFTW_FORWARD) ? 0 : 1;
+ fftw_plan *cur_plan_p = &plan[which];
+ bool create_new_plan = false;
+
+ if (plan[which] == 0 || n[which] != npts)
+ {
+ create_new_plan = true;
+ n[which] = npts;
+ }
+
+ if (create_new_plan)
+ {
+ if (*cur_plan_p)
+ fftw_destroy_plan (*cur_plan_p);
+ *cur_plan_p = fftw_create_plan (npts, dir, plan_flags);
+ if (*cur_plan_p == 0)
+ (*current_liboctave_error_handler) ("Error creating fftw plan");
+ }
+
+ return *cur_plan_p;
+}
+
+fftwnd_plan
+octave_fftw_planner::create_plan2d (fftw_direction dir,
+ size_t nrows, size_t ncols)
+{
+ size_t which = (dir == FFTW_FORWARD) ? 0 : 1;
+ fftwnd_plan *cur_plan_p = &plan2d[which];
+ bool create_new_plan = false;
+
+ if (plan2d[which] == 0 || n2d[which][0] != nrows || n2d[which][1] != ncols)
+ {
+ create_new_plan = true;
+ n2d[which][0] = nrows;
+ n2d[which][1] = ncols;
+ }
+
+ if (create_new_plan)
+ {
+ if (*cur_plan_p)
+ fftwnd_destroy_plan (*cur_plan_p);
+ *cur_plan_p = fftw2d_create_plan (nrows, ncols, dir,
+ plan_flags | FFTW_IN_PLACE);
+ if (*cur_plan_p == 0)
+ (*current_liboctave_error_handler) ("Error creating 2d fftw plan");
+ }
+
+ return *cur_plan_p;
+}
+
+static octave_fftw_planner fftw_planner;
+
+int
+octave_fftw::fft (const Complex *in, Complex *out, size_t npts)
+{
+ fftw_one (fftw_planner.create_plan (FFTW_FORWARD, npts),
+ reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
+ reinterpret_cast<fftw_complex *> (out));
+
+ return 0;
+}
+
+int
+octave_fftw::ifft (const Complex *in, Complex *out, size_t npts)
+{
+ fftw_one (fftw_planner.create_plan (FFTW_BACKWARD, npts),
+ reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
+ reinterpret_cast<fftw_complex *> (out));
+
+ const Complex scale = npts;
+ for (size_t i = 0; i < npts; i++)
+ out[i] /= scale;
+
+ return 0;
+}
+
+int
+octave_fftw::fft2d (Complex *inout, size_t nr, size_t nc)
+{
+ fftwnd_one (fftw_planner.create_plan2d (FFTW_FORWARD, nr, nc),
+ reinterpret_cast<fftw_complex *> (inout),
+ NULL);
+
+ return 0;
+}
+
+int
+octave_fftw::ifft2d (Complex *inout, size_t nr, size_t nc)
+{
+ fftwnd_one (fftw_planner.create_plan2d (FFTW_BACKWARD, nr, nc),
+ reinterpret_cast<fftw_complex *> (inout),
+ NULL);
+
+ const size_t npts = nr * nc;
+ const Complex scale = npts;
+ for (size_t i = 0; i < npts; i++)
+ inout[i] /= scale;
+
+ return 0;
+}
+
+#else /* HAVE_FFTW */
+
+extern int oct_empty_translation_unit;
+
+#endif /* HAVE_FFTW */
+
+/*
+;;; Local Variables: ***
+;;; mode: C++ ***
+;;; End: ***
+*/
+
- [patch] Use fftw if available,
Mumit Khan <=