# HG changeset patch # User Jaroslav Hajek
# Date 1214424667 -7200 # Node ID ed779eaefdd3554108459ca0724825ba02243f5e # Parent f1a1f6dd7fac4a6d98932571657d19bbd7b3dc0a improve set functions for Matlab compatibility diff --git a/scripts/ChangeLog b/scripts/ChangeLog --- a/scripts/ChangeLog +++ b/scripts/ChangeLog @@ -1,3 +1,12 @@ +2008-06-25 Jaroslav Hajek + + * set/unique.m: Implement 'first' and 'last', some simplifications. + * set/union.m: Implement output indices. + * set/intersect.m: Implement 'rows'. + * set/setdiff.m: Implement output indices. + * set/setxor.m: Implement 'rows' and output indices. + Add tests and adjust docs in all of the above. + 2008-06-11 John W. Eaton * set/ismember.m: Fix fail tests. diff --git a/scripts/set/intersect.m b/scripts/set/intersect.m --- a/scripts/set/intersect.m +++ b/scripts/set/intersect.m @@ -1,4 +1,5 @@ ## Copyright (C) 2000, 2006, 2007 Paul Kienzle +## Copyright (C) 2008 Jaroslav Hajek ## ## This file is part of Octave. ## @@ -30,33 +31,50 @@ ## @end deftypefn ## @seealso{unique, union, setxor, setdiff, ismember} -function [c, ia, ib] = intersect (a, b) - if (nargin != 2) +function [c, ia, ib] = intersect (a, b, varargin) + + if (nargin < 2 || nargin > 3) print_usage (); endif + + if (nargin == 3 && ! strcmpi (varargin{1}, "rows")) + error ("intersect: if a third input argument is present, it must be the string 'rows'"); + endif + if (isempty (a) || isempty (b)) c = ia = ib = []; else ## form a and b into sets - [a, ja] = unique (a); - [b, jb] = unique (b); - - c = [a(:); b(:)]; - [c, ic] = sort (c); ## [a(:);b(:)](ic) == c - - if (iscellstr (c)) - ii = find (strcmp (c(1:end-1), c(2:end))); - else - ii = find (c(1:end-1) == c(2:end)); + if (nargout > 1) + [a, ja] = unique (a, varargin{:}); + [b, jb] = unique (b, varargin{:}); endif - c = c(ii); ## The answer - ia = ja(ic(ii)); ## a(ia) == c - ib = jb(ic(ii+1) - length (a)); ## b(ib) == c + if (nargin > 2) + c = [a; b]; + [c, ic] = sortrows (c); + ii = find (all (c(1:end-1,:) == c(2:end,:), 2)); + c = c(ii,:); + else + c = [a(:); b(:)]; + [c, ic] = sort (c); ## [a(:);b(:)](ic) == c + if (iscellstr (c)) + ii = find (strcmp (c(1:end-1), c(2:end))); + else + ii = find (c(1:end-1) == c(2:end)); + endif + c = c(ii); + endif - if (size (b, 1) == 1 || size (a, 1) == 1) + if (nargout > 1) + ia = ja(ic(ii)); ## a(ia) == c + ib = jb(ic(ii+1) - length (a)); ## b(ib) == c + endif + + + if (nargin == 2 && (size (b, 1) == 1 || size (a, 1) == 1)) c = c.'; endif endif @@ -74,3 +92,12 @@ %! assert(ib,[5 1 2 6]); %! assert(a(ia),c); %! assert(b(ib),c); +%!test +%! a = [1,1,2;1,4,5;2,1,7]; +%! b = [1,4,5;2,3,4;1,1,2;9,8,7]; +%! [c,ia,ib] = intersect(a,b,'rows'); +%! assert(c,[1,1,2;1,4,5]); +%! assert(ia,[1;2]); +%! assert(ib,[3;1]); +%! assert(a(ia,:),c); +%! assert(b(ib,:),c); diff --git a/scripts/set/setdiff.m b/scripts/set/setdiff.m --- a/scripts/set/setdiff.m +++ b/scripts/set/setdiff.m @@ -1,4 +1,5 @@ ## Copyright (C) 2000, 2005, 2006, 2007 Paul Kienzle +## Copyright (C) 2008 Jaroslav Hajek ## ## This file is part of Octave. ## @@ -19,19 +20,22 @@ ## -*- texinfo -*- ## @deftypefn {Function File} {} setdiff (@var{a}, @var{b}) ## @deftypefnx {Function File} {} setdiff (@var{a}, @var{b}, "rows") +## @deftypefnx {Function File} address@hidden, @var{i}] = } setdiff (@var{a}, @var{b}) ## Return the elements in @var{a} that are not in @var{b}, sorted in ## ascending order. If @var{a} and @var{b} are both column vectors ## return a column vector, otherwise return a row vector. ## ## Given the optional third argument @samp{"rows"}, return the rows in ## @var{a} that are not in @var{b}, sorted in ascending order by rows. +## +## If requested, return @var{i} such that @code{c = a(i)}. ## @seealso{unique, union, intersect, setxor, ismember} ## @end deftypefn ## Author: Paul Kienzle ## Adapted-by: jwe -function c = setdiff (a, b, byrows_arg) +function [c, i] = setdiff (a, b, byrows_arg) if (nargin < 2 || nargin > 3) print_usage (); @@ -50,7 +54,11 @@ endif if (byrows) - c = unique (a, "rows"); + if (nargout > 1) + [c, i] = unique (a, "rows"); + else + c = unique (a, "rows"); + endif if (! isempty (c) && ! isempty (b)) ## Form a and b into combined set. b = unique (b, "rows"); @@ -58,9 +66,16 @@ ## Eliminate those elements of a that are the same as in b. dups = find (all (dummy(1:end-1,:) == dummy(2:end,:), 2)); c(idx(dups),:) = []; + if (nargout > 1) + i(idx(dups),:) = []; + endif endif else - c = unique (a); + if (nargout > 1) + [c, i] = unique (a); + else + c = unique (a); + endif if (! isempty (c) && ! isempty (b)) ## Form a and b into combined set. b = unique (b); @@ -72,6 +87,9 @@ dups = find (dummy(1:end-1) == dummy(2:end)); endif c(idx(dups)) = []; + if (nargout > 1) + i(idx(dups)) = []; + endif ## Reshape if necessary. if (size (c, 1) != 1 && size (b, 1) == 1) c = c.'; @@ -88,3 +106,9 @@ %!assert(setdiff([1; 2; 3; 4], [1; 2; 4], "rows"), 3) %!assert(setdiff([1, 2; 3, 4], [1, 2; 3, 6], "rows"), [3, 4]) %!assert(setdiff({"one","two";"three","four"},{"one","two";"three","six"}), {"four"}) + +%!test +%! a = [3, 1, 4, 1, 5]; b = [1, 2, 3, 4]; +%! [y, i] = setdiff (a, b.'); +%! assert(y, [5]); +%! assert(y, a(i)); diff --git a/scripts/set/setxor.m b/scripts/set/setxor.m --- a/scripts/set/setxor.m +++ b/scripts/set/setxor.m @@ -1,4 +1,5 @@ ## Copyright (C) 2000, 2006, 2007 Paul Kienzle +## Copyright (C) 2008 Jaroslav Hajek ## ## This file is part of Octave. ## @@ -18,22 +19,38 @@ ## -*- texinfo -*- ## @deftypefn {Function File} {} setxor (@var{a}, @var{b}) +## @deftypefnx {Function File} {} setxor (@var{a}, @var{b}, 'rows') ## ## Return the elements exclusive to @var{a} or @var{b}, sorted in ascending ## order. If @var{a} and @var{b} are both column vectors return a column ## vector, otherwise return a row vector. ## +## @deftypefnx {Function File} address@hidden, @var{ia}, @var{ib}] =} setxor (@var{a}, @var{b}) +## +## Return index vectors @var{ia} and @var{ib} such that @code{a==c(ia)} and +## @code{b==c(ib)}. +## ## @seealso{unique, union, intersect, setdiff, ismember} ## @end deftypefn -function c = setxor (a, b) - if (nargin != 2) +function [c, ia, ib] = setxor (a, b, varargin) + + if (nargin < 2 || nargin > 3) print_usage (); endif + if (nargin == 3 && ! strcmpi (varargin{1}, "rows")) + error ("setxor: if a third input argument is present, it must be the string 'rows'"); + endif + ## Form A and B into sets. - a = unique (a); - b = unique (b); + if (nargout > 1) + [a, ia] = unique (a, varargin{:}); + [b, ib] = unique (b, varargin{:}); + else + a = unique (a, varargin{:}); + b = unique (b, varargin{:}); + endif if (isempty (a)) c = b; @@ -41,16 +58,39 @@ c = a; else ## Reject duplicates. - c = sort ([a(:); b(:)]); - n = length (c); - idx = find (c(1:n-1) == c(2:n)); - if (! isempty (idx)) - c([idx, idx+1]) = []; - endif - if (size (a, 1) == 1 || size (b, 1) == 1) - c = c.'; + if (nargin > 2) + na = rows (a); nb = rows (b); + [c, i] = sortrows ([a; b]); + n = rows (c); + idx = find (all (c(1:n-1) == c(2:n), 2)); + if (! isempty (idx)) + c([idx, idx+1],:) = []; + i([idx, idx+1],:) = []; + endif + else + na = numel (a); nb = numel (b); + [c, i] = sort ([a(:); b(:)]); + n = length (c); + idx = find (c(1:n-1) == c(2:n)); + if (! isempty (idx)) + c([idx, idx+1]) = []; + i([idx, idx+1]) = []; + endif + if (size (a, 1) == 1 || size (b, 1) == 1) + c = c.'; + endif endif endif + if (nargout > 1) + ia = ia(i(i <= na)); + ib = ib(i(i > na) - na); + endif + endfunction %!assert(setxor([1,2,3],[2,3,4]),[1,4]) +%!test +%! a = [3, 1, 4, 1, 5]; b = [1, 2, 3, 4]; +%! [y, ia, ib] = setxor (a, b.'); +%! assert(y, [2, 5]); +%! assert(y, sort([a(ia), b(ib)])); diff --git a/scripts/set/union.m b/scripts/set/union.m --- a/scripts/set/union.m +++ b/scripts/set/union.m @@ -1,5 +1,6 @@ ## Copyright (C) 1994, 1996, 1997, 1999, 2000, 2003, 2004, 2005, 2006, ## 2007, 2008 John W. Eaton +## Copyright (C) 2008 Jaroslav Hajek ## ## This file is part of Octave. ## @@ -18,10 +19,10 @@ ##