# HG changeset patch
# User Jaroslav Hajek
# Date 1205416768 -3600
# Node ID 7313781aec60a22305538af18ecc7e087d03c286
# Parent 84122fb29c754281b397edf119de16fd79848943
generic sequential binary lookup for liboctave
diff -r 84122fb29c75 -r 7313781aec60 liboctave/ChangeLog
--- a/liboctave/ChangeLog Wed Mar 12 21:17:07 2008 -0400
+++ b/liboctave/ChangeLog Thu Mar 13 14:59:28 2008 +0100
@@ -1,3 +1,7 @@ 2008-03-08 John W. Eaton
+
+ * oct-lookup.h: New source.
+
2008-03-08 John W. Eaton
* Sparse.cc (Sparse::index, assign): Likewise.
diff -r 84122fb29c75 -r 7313781aec60 liboctave/oct-lookup.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/liboctave/oct-lookup.h Thu Mar 13 14:59:28 2008 +0100
@@ -0,0 +1,318 @@
+/*
+Copyright (C) 2008 VZLU Prague, a.s., Czech Republic
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING. If not, see
+.
+
+*/
+
+// Author: Jaroslav Hajek
+
+#if !defined (octave_oct_lookup)
+#define octave_oct_lookup 1
+
+#include "oct-types.h"
+
+// generic sequential binary lookup
+// design goals:
+// 1. optimize for the case of vals array larger than table but with
+// few monotonicity switches.
+// 2. use templates to allow generic comparisons
+// 3. keep in mind the performance of the numeric cases
+// 4. decreasing & increasing table should be equally efficient and
+// handled by the Comparator class
+//
+// outline of the algorithm:
+// 1. start with full range
+// 2. determine interval enclosing the next value by binary search
+// 3. fetch values while they stay inside the interval
+// if escaped to the left, go to 4. if escaped to the right,
+// go to 5. If processed all values, return.
+// 4. enclose the next value by binary bracketing to the left.
+// if just one step was taken (simple shift), go to 3.
+// otherwise, go to 2.
+// 5. enclose the next value by binary bracketing to the right.
+// if just one step was taken (simple shift), go to 3.
+// otherwise, go to 2.
+//
+// note: in the case of dense downsampling, the algorithm will
+// often skip 2 and perform just one step in steps 4 and 5,
+// while spending most of time in step 3.
+
+// the comparator class is expected to provide:
+// Comparator::lt (const T&, const T&) - strict comparison, i.e. <
+// Comparator::le (const T&, const T&) - non-strict comparison, i.e. <
+//
+template
+void do_bin_seq_lookup (const T* begin, const T* end,
+ const T* vals, octave_idx_type nvals,
+ octave_idx_type* idx,
+ Comparator C)
+{
+ // these are "global" pointers (i.e. used by all four blocks)
+ const T *first = begin, *last = end;
+
+ // the trivial case of empty array. Store all zeros and return.
+ if (begin == end)
+ {
+ for (;nvals;--nvals)
+ *(idx++) = 0;
+ return;
+ }
+
+ // initialize
+ last = end;
+ first = begin;
+
+ // perform a binary search in [first,last)
+bin_search:
+ {
+ octave_idx_type dist = last - first, half;
+
+ while (dist > 0)
+ {
+ half = dist >> 1;
+ last = first + half;
+ if (C.lt (*last, *vals))
+ {
+ first = last + 1;
+ dist -= (half + 1);
+ }
+ else
+ dist = half;
+ }
+ last = first;
+ }
+
+ // fetch values as long as they stay in the current interval.
+ // once they get out, jump to the appropriate bracketing block.
+store_cur:
+ {
+ octave_idx_type cur;
+ cur = last - begin;
+
+ if (last == begin)
+ {
+ // leftmost interval (can't escape backwards)
+ while (nvals)
+ {
+ if (C.le (*last, *vals))
+ goto search_forwards;
+
+ *(idx++) = cur;
+ nvals--; vals++;
+ }
+ return;
+ }
+ else if (last == end)
+ {
+ // rightmost interval (can't escape forwards)
+ first = last - 1;
+ while (nvals)
+ {
+ if (C.lt (*vals, *first))
+ goto search_backwards;
+
+ *(idx++) = cur;
+ nvals--; vals++;
+ }
+ return;
+ }
+ else
+ {
+ // inner interval (can escape both ways)
+ first = last - 1;
+ while (nvals)
+ {
+ if (C.le (*last, *vals))
+ goto search_forwards;
+
+ if (C.lt (*vals, *first))
+ goto search_backwards;
+
+ *(idx++) = cur;
+ nvals--; vals++;
+ }
+ return;
+ }
+
+ }
+
+ // the current value got to the right of current interval.
+ // perform binary bracketing to the right
+search_forwards:
+ {
+ octave_idx_type dist = 1, max = end - last;
+ while (true)
+ {
+ first = last;
+ if (dist < max)
+ last += dist;
+ else
+ {
+ last = end;
+ break;
+ }
+ if (C.lt (*vals, *last))
+ break;
+ max -= dist;
+ dist <<= 1;
+ }
+
+ if (! (--dist))
+ goto store_cur; // a shortcut. If dist was 1, bin_search is not needed.
+ else
+ goto bin_search;
+ }
+
+ // the current value got to the left of current interval.
+ // perform binary bracketing to the left.
+search_backwards:
+ {
+ octave_idx_type dist = 1, max = first - begin;
+ while (true)
+ {
+ last = first;
+ if (dist < max)
+ first -= dist;
+ else
+ {
+ first = begin;
+ break;
+ }
+ if (C.le (*first, *vals))
+ break;
+ max -= dist;
+ dist <<= 1;
+ }
+
+ if (! (--dist))
+ goto store_cur; // a shortcut. If dist was 1, bin_search is not needed.
+ else
+ goto bin_search;
+ }
+
+}
+
+
+// the default comparators: for types ordered by the < operator
+template
+class comparator
+{
+public:
+ class ascending
+ {
+ public:
+ static bool lt (const T& a, const T& b) { return a < b; }
+ static bool le (const T& a, const T& b) { return ! (b < a); }
+ };
+
+ class descending
+ {
+ public:
+ static bool lt (const T& a, const T& b) { return b < a; }
+ static bool le (const T& a, const T& b) { return ! (a < b); }
+ };
+};
+
+// specialize non-strict equality for reals (to facilitate optimization).
+template<>
+bool comparator::ascending::le (const double& a, const double& b)
+{
+ return a <= b;
+}
+
+template<>
+bool comparator::descending::le(const double& a, const double& b)
+{
+ return a >= b;
+}
+
+// comparators for types ordered by an user function
+
+template
+class comparator_ufunc
+{
+public:
+ typedef bool ufunc_type (const T&, const T&);
+
+ class ascending
+ {
+ ufunc_type& ufunc;
+ public:
+ ascending(ufunc_type& F) : ufunc(F) {}
+ bool lt (const T& a, const T& b) { return ufunc (a, b); }
+ bool le (const T& a, const T& b) { return ! ufunc (b, a); }
+ };
+
+ class descending
+ {
+ ufunc_type& ufunc;
+ public:
+ descending(ufunc_type& F) : ufunc(F) {}
+ bool lt (const T& a, const T& b) { return ufunc (b, a); }
+ bool le (const T& a, const T& b) { return ! ufunc (a, b); }
+ };
+};
+
+// the basic sequential binary lookup
+template
+void octave_lookup (const T* table, octave_idx_type size,
+ const T* vals, octave_idx_type nvals, octave_idx_type *idx,
+ bool desc)
+{
+ static typename comparator::ascending comp_asc;
+ static typename comparator::descending comp_desc;
+
+ if (size >= 0)
+ if (desc)
+ do_bin_seq_lookup (table, table + size, vals, nvals, idx, comp_desc);
+ else
+ do_bin_seq_lookup (table, table + size, vals, nvals, idx, comp_asc);
+}
+
+// the basic sequential binary lookup with user comparison
+template
+void octave_lookup (const T* table, octave_idx_type size,
+ const T* vals, octave_idx_type nvals, octave_idx_type *idx,
+ typename comparator_ufunc::ufunc_type& ufunc, bool desc)
+{
+ typename comparator_ufunc::ascending comp_asc (ufunc);
+ typename comparator_ufunc::descending comp_desc (ufunc);
+
+ if (size >= 0)
+ if (desc)
+ do_bin_seq_lookup (table, table + size, vals, nvals, idx, comp_desc);
+ else
+ do_bin_seq_lookup (table, table + size, vals, nvals, idx, comp_asc);
+}
+
+
+// helper functions - determine whether an array is descending
+template
+bool check_descending (const T* table, octave_idx_type size)
+{
+ return size > 1 && table[size-1] < table[0];
+}
+
+template
+bool check_descending (const T* table, octave_idx_type size,
+ typename comparator_ufunc::ufunc_type& ufunc)
+{
+ return size > 1 && ufunc (table[size-1], table[0]);
+}
+
+#endif
diff -r 84122fb29c75 -r 7313781aec60 scripts/ChangeLog
--- a/scripts/ChangeLog Wed Mar 12 21:17:07 2008 -0400
+++ b/scripts/ChangeLog Thu Mar 13 14:59:28 2008 +0100
@@ -1,3 +1,8 @@ 2008-03-12 David Bateman
+
+ * general/lookup.m: removed (lookup moved to DLD-FUNCTIONS).
+ * general/Makefile.in: deleted reference to lookup.m
+
2008-03-12 David Bateman
* geometry/griddata3.m: Use griddatan and not griddata
diff -r 84122fb29c75 -r 7313781aec60 scripts/general/Makefile.in
--- a/scripts/general/Makefile.in Wed Mar 12 21:17:07 2008 -0400
+++ b/scripts/general/Makefile.in Thu Mar 13 14:59:28 2008 +0100
@@ -40,7 +40,7 @@ SOURCES = __isequal__.m __splinen__.m ac
interp2.m interp3.m interpn.m interpft.m is_duplicate_entry.m isa.m \
isdefinite.m isdir.m isequal.m isequalwithequalnans.m \
isscalar.m issquare.m issymmetric.m isvector.m logical.m logspace.m \
- lookup.m mod.m nargchk.m nextpow2.m nthroot.m num2str.m perror.m \
+ mod.m nargchk.m nextpow2.m nthroot.m num2str.m perror.m \
pol2cart.m polyarea.m postpad.m prepad.m quadl.m randperm.m rat.m rem.m \
repmat.m rot90.m rotdim.m shift.m shiftdim.m sortrows.m \
sph2cart.m strerror.m structfun.m sub2ind.m trapz.m tril.m triu.m
diff -r 84122fb29c75 -r 7313781aec60 scripts/general/lookup.m
--- a/scripts/general/lookup.m Wed Mar 12 21:17:07 2008 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-## Copyright (C) 2000, 2006, 2007 Paul Kienzle
-##
-## This file is part of Octave.
-##
-## Octave is free software; you can redistribute it and/or modify it
-## under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 3 of the License, or (at
-## your option) any later version.
-##
-## Octave is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Octave; see the file COPYING. If not, see
-## .
-
-## -*- texinfo -*-
-## @deftypefn {Function File} address@hidden =} lookup (@var{table}, @var{y})
-## Lookup values in a sorted table. Usually used as a prelude to
-## interpolation.
-##
-## If table is strictly increasing and @code{idx = lookup (table, y)}, then
-## @code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)}
-## within the table. If @code{y(i)} is before the table, then
-## @code{idx(i)} is 0. If @code{y(i)} is after the table then
-## @code{idx(i)} is @code{table(n)}.
-##
-## If the table is strictly decreasing, then the tests are reversed.
-## There are no guarantees for tables which are non-monotonic or are not
-## strictly monotonic.
-##
-## To get an index value which lies within an interval of the table,
-## use:
-##
-## @example
-## idx = lookup (table(2:length(table)-1), y) + 1
-## @end example
-##
-## @noindent
-## This expression puts values before the table into the first
-## interval, and values after the table into the last interval.
-## @end deftypefn
-
-## Changed from binary search to sort.
-## Thanks to Kai Habel for the suggestion.
-
-## TODO: sort-based lookup is significantly slower given a large table
-## TODO: and small lookup vector. This shouldn't be a problem since
-## TODO: interpolation (the reason for the table lookup in the first
-## TODO: place) usually involves subsampling of an existing table. The
-## TODO: other use of interpolation, looking up values one at a time, is
-## TODO: unfortunately significantly slower for large tables.
-## TODO: sort is order O((lt+lx)*log(lt+lx))
-## TODO: search is order O(lx*log(lt))
-## TODO: Clearly, search is asymptotically better than sort, but sort
-## TODO: is compiled and search is not. Could support both, or recode
-## TODO: search in C++, or assume things are good enough as they stand.
-
-function idx = lookup (table, xi)
- if (nargin == 2)
- if (isempty (table))
- idx = zeros (size (xi));
- elseif (isvector (table))
- [nr, nc] = size (xi);
- lt = length (table);
- if (table(1) > table(lt))
- ## decreasing table
- [v, p] = sort ([xi(:); table(:)]);
- idx(p) = cumsum (p > nr*nc);
- idx = lt - idx(1:nr*nc);
- else
- ## increasing table
- [v, p] = sort ([table(:); xi(:) ]);
- idx(p) = cumsum (p <= lt);
- idx = idx(lt+1:lt+nr*nc);
- endif
- idx = reshape (idx, nr, nc);
- else
- error ("lookup: table must be a vector");
- endif
- else
- print_usage ();
- endif
-endfunction
-
-%!assert (lookup(1:3, 0.5), 0) # value before table
-%!assert (lookup(1:3, 3.5), 3) # value after table error
-%!assert (lookup(1:3, 1.5), 1) # value within table error
-%!assert (lookup(1:3, [3,2,1]), [3,2,1])
-%!assert (lookup([1:4]', [1.2, 3.5]'), [1, 3]');
-%!assert (lookup([1:4], [1.2, 3.5]'), [1, 3]');
-%!assert (lookup([1:4]', [1.2, 3.5]), [1, 3]);
-%!assert (lookup([1:4], [1.2, 3.5]), [1, 3]);
-%!assert (lookup(1:3, [3, 2, 1]), [3, 2, 1]);
-%!assert (lookup([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1])
-%!assert (isempty(lookup([1:3], [])))
-%!assert (isempty(lookup([1:3]', [])))
-%!assert (lookup(1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0]);
diff -r 84122fb29c75 -r 7313781aec60 src/ChangeLog
--- a/src/ChangeLog Wed Mar 12 21:17:07 2008 -0400
+++ b/src/ChangeLog Thu Mar 13 14:59:28 2008 +0100
@@ -1,3 +1,7 @@ 2008-03-12 John W. Eaton
+
+ * DLD-FUNCTIONS/lookup.cc (Flookup): add new function.
+
2008-03-12 John W. Eaton
* variables.cc (Vwhos_line_format): Omit print_dims parameter.
diff -r 84122fb29c75 -r 7313781aec60 src/DLD-FUNCTIONS/lookup.cc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/DLD-FUNCTIONS/lookup.cc Thu Mar 13 14:59:28 2008 +0100
@@ -0,0 +1,213 @@
+/*
+
+Copyright (C) 2008 VZLU Prague a.s., Czech Republic
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+Octave is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING. If not, see
+.
+
+*/
+
+// Author: Jaroslav Hajek
+
+
+#ifdef HAVE_CONFIG_H
+#include
+#endif
+
+#include "dNDArray.h"
+#include "CNDArray.h"
+#include "oct-lookup.h"
+
+#include "Cell.h"
+#include "defun-dld.h"
+#include "error.h"
+#include "gripes.h"
+#include "oct-obj.h"
+#include "ov.h"
+
+// TODO: remove these one once octave_value supports octave_idx_type.
+static octave_value& assign (octave_value& ov, octave_idx_type idx)
+{
+ double tmp = idx;
+ ov = tmp;
+ return ov;
+}
+
+static octave_value& assign (octave_value& ov, const ArrayN& ida)
+{
+ NDArray tmp (ida.dims ());
+ for (int i = 0; i < ida.numel (); i++)
+ tmp(i) = ida(i);
+ ov = tmp;
+ return ov;
+}
+
+static bool ov_str_comp (const octave_value& a, const octave_value& b)
+{
+ return a.string_value () < b.string_value ();
+}
+
+DEFUN_DLD (lookup, args, nargout, "\
+-*- texinfo -*-\n\
address@hidden {Function File} address@hidden =} lookup (@var{table}, @var{y})\n\
+Lookup values in a sorted table. Usually used as a prelude to\n\
+interpolation.\n\
+\n\
+If table is strictly increasing and @code{idx = lookup (table, y)}, then\n\
address@hidden(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)}\n\
+within the table. If @code{y(i)} is before the table, then\n\
address@hidden(i)} is 0. If @code{y(i)} is after the table then\n\
address@hidden(i)} is @code{table(n)}.\n\
+\n\
+If the table is strictly decreasing, then the tests are reversed.\n\
+There are no guarantees for tables which are non-monotonic or are not\n\
+strictly monotonic.\n\
+\n\
+To get an index value which lies within an interval of the table,\n\
+use:\n\
+\n\
address@hidden
+idx = lookup (table(2:length(table)-1), y) + 1\n\
address@hidden example\n\
+\n\
address@hidden
+This expression puts values before the table into the first\n\
+interval, and values after the table into the last interval.\n\
+\n\
+If complex values are supplied, their magnitudes will be used.\n\
+\n\
address@hidden and @var{y} can also be a cell array of strings\n\
+(or @var{y} can be a single string). In this case, string lookup\n\
+is performed using lexicographical comparison.\n\
address@hidden deftypefn")
+{
+ int nargin = args.length ();
+ octave_value_list retval;
+
+ if (nargin != 2)
+ {
+ print_usage ();
+ return retval;
+ }
+
+ octave_value argtable = args(0), argy = args(1);
+
+ if (argtable.ndims () > 2 || (argtable.columns () > 1 && argtable.rows () > 1))
+ warning ("lookup: table is not a vector");
+
+ bool num_case = argtable.is_numeric_type () && argy.is_numeric_type ();
+ bool str_case = argtable.is_cell () && (argy.is_cell () || argy.is_string ());
+
+ if (num_case)
+ {
+ // in the case of a complex array, absolute values will be used (though it's
+ // not too meaningful).
+
+ NDArray table = (argtable.is_complex_type ())
+ ? argtable.complex_array_value ().abs ()
+ : argtable.array_value ();
+
+ NDArray y = (argy.is_complex_type ())
+ ? argy.complex_array_value ().abs ()
+ : argy.array_value ();
+
+ ArrayN idx (y.dims ());
+
+ // determine whether the array is descending.
+ bool is_descending = check_descending(table.data (), table.length ());
+
+ octave_lookup (table.data (), table.length (),
+ y.data (), y.length (), idx.fortran_vec (),
+ is_descending);
+
+ //retval(0) = idx;
+ assign (retval(0), idx);
+ }
+ else if (str_case)
+ {
+ Cell table = argtable.cell_value ();
+
+ // query just the first cell to verify it's a string
+ if (table(0).is_string ())
+ {
+ // determine whether the array is descending.
+ bool is_descending = check_descending(table.data (), table.length (),
+ ov_str_comp);
+
+ if (argy.is_cell ())
+ {
+ Cell y = argy.cell_value ();
+ ArrayN idx (y.dims ());
+
+ // strings are rarely meaningfully downsampled. Even multiple
+ // strings are probably being looked up just for convenience.
+ // In that case, the sequential lookup is too optimistic to
+ // expect neighbouring strings be often close to each other.
+ // Therefore, use a sequence of full lookups rather than the
+ // sequential version.
+
+ for (int i = 0; i < y.numel (); i++)
+ {
+ octave_lookup (table.data (), table.length (),
+ &y(i), 1, &idx(i),
+ ov_str_comp, is_descending);
+ }
+
+ //retval(0) = idx;
+ assign (retval(0), idx);
+ }
+ else
+ {
+ octave_idx_type idx;
+
+ octave_lookup (table.data (), table.length (),
+ &argy, 1, &idx,
+ ov_str_comp, is_descending);
+
+ //retval(0) = idx;
+ assign (retval(0), idx);
+ }
+ }
+ else
+ error("lookup: table is not a cell array of strings.");
+
+ }
+ else
+ print_usage ();
+
+ return retval;
+
+}
+
+/*
+%!assert (real(lookup(1:3, 0.5)), 0) # value before table
+%!assert (real(lookup(1:3, 3.5)), 3) # value after table error
+%!assert (real(lookup(1:3, 1.5)), 1) # value within table error
+%!assert (real(lookup(1:3, [3,2,1])), [3,2,1])
+%!assert (real(lookup([1:4]', [1.2, 3.5]')), [1, 3]');
+%!assert (real(lookup([1:4], [1.2, 3.5]')), [1, 3]');
+%!assert (real(lookup([1:4]', [1.2, 3.5])), [1, 3]);
+%!assert (real(lookup([1:4], [1.2, 3.5])), [1, 3]);
+%!assert (real(lookup(1:3, [3, 2, 1])), [3, 2, 1]);
+%!assert (real(lookup([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5])), [0, 1, 2, 1, 1])
+%!assert (isempty(lookup([1:3], [])))
+%!assert (isempty(lookup([1:3]', [])))
+%!assert (real(lookup(1:3, [1, 2; 3, 0.5])), [1, 2; 3, 0]);
+%!
+%!assert (real(lookup({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"})), [1,1;0,2])
+%!assert (real(lookup({"apple","lemon","orange"}, "potato")), 3)
+%!assert (real(lookup({"orange","lemon","apple"}, "potato")), 0)
+*/
diff -r 84122fb29c75 -r 7313781aec60 src/Makefile.in
--- a/src/Makefile.in Wed Mar 12 21:17:07 2008 -0400
+++ b/src/Makefile.in Thu Mar 13 14:59:28 2008 +0100
@@ -67,7 +67,7 @@ DLD_XSRC := balance.cc besselj.cc betain
dasrt.cc dassl.cc det.cc dispatch.cc dlmread.cc dmperm.cc eig.cc \
expm.cc fft.cc fft2.cc fftn.cc fftw.cc filter.cc find.cc fsolve.cc \
gammainc.cc gcd.cc getgrent.cc getpwent.cc getrusage.cc \
- givens.cc hess.cc inv.cc kron.cc lsode.cc \
+ givens.cc hess.cc inv.cc kron.cc lookup.cc lsode.cc \
lu.cc luinc.cc matrix_type.cc md5sum.cc minmax.cc pinv.cc qr.cc \
quad.cc qz.cc rand.cc regexp.cc schur.cc sparse.cc \
spparms.cc sqrtm.cc svd.cc syl.cc symrcm.cc symbfact.cc \