[gnuastro-commits] master 3aa904ec: Library (dimension.h): functions to

gnuastro-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gnuastro-commits] master 3aa904ec: Library (dimension.h): functions to

From:	Mohammad Akhlaghi
Subject:	[gnuastro-commits] master 3aa904ec: Library (dimension.h): functions to collapse by sigma-clipping
Date:	Fri, 9 Sep 2022 23:28:39 -0400 (EDT)
branch: master
commit 3aa904eca8c1be26efbacfaa2659f38e9711e0b0
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Library (dimension.h): functions to collapse by sigma-clipping
    
    Until now, the only way you could collapse dimensions in Gnuastro was by
    mean, sum or number. However, in some scenarios, the mean alone isn't
    sufficient since a single outlier can significantly shift it.
    
    With this commit, the necessary high-level functions have been added to
    allow easy sort-based collapsing (for example with the median or
    sigma-clipping). Using these low level functions in 'dimension.h', the
    Arithmetic program now provides 5 new operators for collapsing a data set
    using such sort-based statistics.
---
 NEWS                        |  12 ++
 bin/arithmetic/arithmetic.c | 126 ++++++++++++++++---
 bin/arithmetic/arithmetic.h |   5 +
 doc/gnuastro.texi           | 187 +++++++++++++++++-----------
 lib/dimension.c             | 290 +++++++++++++++++++++++++++++++++++++++++++-
 lib/gnuastro/dimension.h    |  29 +++++
 lib/statistics.c            |   2 +-
 7 files changed, 560 insertions(+), 91 deletions(-)

diff --git a/NEWS b/NEWS
index ecd2090e..3def0eb2 100644
--- a/NEWS
+++ b/NEWS
@@ -31,6 +31,13 @@ See the end of the file for license conditions.
     - sb-to-counts: convert surface brightness (mag/arcsec^2) to counts.
     - mag-to-sb: convert magnitudes to surface brightness over a certain area.
     - sb-to-mag: convert surface brightness to magnitudes over a certain area.
+  - New operators that are specific to Arithmetic:
+    - collapse-median: collapse input dataset by calculating the median
+      along the given dimension.
+    - collapse-sigclip-std: Collapse with sigma-clipped standard deviation.
+    - collapse-sigclip-mean: Collapse with sigma-clipped mean.
+    - collapse-sigclip-median: Collapse with sigma-clipped median.
+    - collapse-sigclip-number: Collapse with number remaining after sigma-clip.
 
   ConvertType:
   - It is now possible to draw vector graphics marks from a catalog over
@@ -124,6 +131,11 @@ See the end of the file for license conditions.
   - gal_color_id_to_name: return the name of a color from its ID.
   - gal_color_in_rgb: return the fraction of red-green-blue in a color.
   - gal_color_name_to_id: return the ID of a color from its name.
+  - gal_dimension_collapse_median: collapse input along dim. using median.
+  - gal_dimension_collapse_sigclip_mean: collapse with sig-clipped mean.
+  - gal_dimension_collapse_sigclip_std: collapse with sig-clipped STD.
+  - gal_dimension_collapse_sigclip_median: collapse with sig-clipped median.
+  - gal_dimension_collapse_sigclip_number: collapse with num. after sig-clip.
   - gal_eps_shape_id_to_name: return the name of a shape from its ID.
   - gal_eps_shape_name_to_id: return the ID of a shape from its name.
   - gal_fits_unique_keyvalues: extract all unique values to a certain
diff --git a/bin/arithmetic/arithmetic.c b/bin/arithmetic/arithmetic.c
index 3551c4bb..84f622aa 100644
--- a/bin/arithmetic/arithmetic.c
+++ b/bin/arithmetic/arithmetic.c
@@ -903,32 +903,65 @@ arithmetic_interpolate(struct arithmeticparams *p, int 
operator, char *token)
 
 
 
+static void
+arithmetic_collapse_single_value(gal_data_t *input, char *counter,
+                                 char *opstr, char *description,
+                                 int checkint)
+{
+  if( input->ndim!=1 || input->size!=1)
+    error(EXIT_FAILURE, 0, "%s popped operand of 'collapse-%s*' operators "
+          "(%s) must be a single number (single-element, "
+          "one-dimensional dataset). But it has %zu dimension(s) and %zu "
+          "element(s).", counter, opstr, description, input->ndim, 
input->size);
+  if(checkint)
+    if(input->type==GAL_TYPE_FLOAT32 || input->type==GAL_TYPE_FLOAT64)
+      error(EXIT_FAILURE, 0, "%s popped operand of 'collapse-%s*' operators "
+            "(%s) must have an integer type, but it has a floating point "
+            "type ('%s')", counter, opstr, description,
+            gal_type_name(input->type,1));
+}
+
+
+
+
+
 static void
 arithmetic_collapse(struct arithmeticparams *p, char *token, int operator)
 {
   long dim;
-  gal_data_t *collapsed=NULL;
+  float p1, p2;
+  int qmm=p->cp.quietmmap;
+  gal_data_t *dimension=NULL, *input=NULL;
+  size_t nt=p->cp.numthreads, mms=p->cp.minmapsize;
+  gal_data_t *collapsed=NULL, *param1=NULL, *param2=NULL;
+
 
   /* First popped operand is the dimension. */
-  gal_data_t *dimension = operands_pop(p, token);
+  dimension = operands_pop(p, token);
 
-  /* The second popped operand is the desired input dataset. */
-  gal_data_t *input = operands_pop(p, token);
+
+  /* If we are on any of the sigma-clipping operators, we need to pop two
+     more operands. */
+  if(    operator==ARITHMETIC_OP_COLLAPSE_SIGCLIP_STD
+      || operator==ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEAN
+      || operator==ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEDIAN
+      || operator==ARITHMETIC_OP_COLLAPSE_SIGCLIP_NUMBER )
+    {
+      param2=operands_pop(p, token); /* Termination criteria. */
+      param1=operands_pop(p, token); /* Multiple of sigma. */
+    }
 
 
-  /* Small sanity check. */
-  if( dimension->ndim!=1 || dimension->size!=1)
-    error(EXIT_FAILURE, 0, "first popped operand of 'collapse-*' operators "
-          "(dimension to collapse) must be a single number (single-element, "
-          "one-dimensional dataset). But it has %zu dimension(s) and %zu "
-          "element(s).", dimension->ndim, dimension->size);
-  if(dimension->type==GAL_TYPE_FLOAT32 || dimension->type==GAL_TYPE_FLOAT64)
-    error(EXIT_FAILURE, 0, "first popped operand of 'collapse-*' operators "
-          "(dimension to collapse) must have an integer type, but it has "
-          "a floating point type ('%s')", gal_type_name(dimension->type,1));
+  /* Final popped operand is the desired input dataset. */
+  input = operands_pop(p, token);
+
+
+  /* Sanity checks. */
+  arithmetic_collapse_single_value(dimension, "first", "",
+                                   "dimension to collapse", 1);
   dimension=gal_data_copy_to_new_type_free(dimension, GAL_TYPE_LONG);
   dim=((long *)(dimension->array))[0];
-  if(dim<0 || dim==0)
+  if(dim<=0)
     error(EXIT_FAILURE, 0, "first popped operand of 'collapse-*' operators "
           "(dimension to collapse) must be positive (larger than zero), it "
           "is %ld", dim);
@@ -936,7 +969,22 @@ arithmetic_collapse(struct arithmeticparams *p, char 
*token, int operator)
     error(EXIT_FAILURE, 0, "input dataset to '%s' has %zu dimension(s), "
           "but you have asked to collapse along dimension %ld", token,
           input->ndim, dim);
-
+  if(param1)
+    {
+      arithmetic_collapse_single_value(param1, "third", "sigclip-",
+                                       "sigma-clip's multiple of sigma", 0);
+      arithmetic_collapse_single_value(param2, "second", "sigclip-",
+                                       "sigma-clip's termination param", 0);
+      param1=gal_data_copy_to_new_type_free(param1, GAL_TYPE_FLOAT32);
+      param2=gal_data_copy_to_new_type_free(param2, GAL_TYPE_FLOAT32);
+      p1=((float *)(param1->array))[0];
+      p2=((float *)(param2->array))[0];
+      if(p1<=0 || p2<=0)
+        error(EXIT_FAILURE, 0, "third and second popped operands of "
+              "'collapse-sigclip-*' operators (sigma multiple and "
+              "termination criteria) must be positive (larger than "
+              "zero), but they are %g and %g", p1, p2);
+    }
 
   /* If a WCS structure has been read, we'll need to pass it to
      'gal_dimension_collapse', so it modifies it respectively. */
@@ -970,6 +1018,31 @@ arithmetic_collapse(struct arithmeticparams *p, char 
*token, int operator)
       collapsed=gal_dimension_collapse_minmax(input, input->ndim-dim, 1);
       break;
 
+    case ARITHMETIC_OP_COLLAPSE_MEDIAN:
+      collapsed=gal_dimension_collapse_median(input, input->ndim-dim,
+                                              nt, mms, qmm);
+      break;
+
+    case ARITHMETIC_OP_COLLAPSE_SIGCLIP_STD:
+      collapsed=gal_dimension_collapse_sclip_std(input, input->ndim-dim,
+                                                 p1, p2, nt, mms, qmm);
+      break;
+
+    case ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEAN:
+      collapsed=gal_dimension_collapse_sclip_mean(input, input->ndim-dim,
+                                                  p1, p2, nt, mms, qmm);
+      break;
+
+    case ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEDIAN:
+      collapsed=gal_dimension_collapse_sclip_median(input, input->ndim-dim,
+                                                    p1, p2, nt, mms, qmm);
+      break;
+
+    case ARITHMETIC_OP_COLLAPSE_SIGCLIP_NUMBER:
+      collapsed=gal_dimension_collapse_sclip_number(input, input->ndim-dim,
+                                                    p1, p2, nt, mms, qmm);
+      break;
+
     default:
       error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at %s to fix the "
             "problem. The operator code %d is not recognized", __func__,
@@ -1270,7 +1343,17 @@ arithmetic_set_operator(char *string, size_t 
*num_operands, int *inlib)
       else if (!strcmp(string, "collapse-mean"))
         { op=ARITHMETIC_OP_COLLAPSE_MEAN;         *num_operands=0; }
       else if (!strcmp(string, "collapse-number"))
-        { op=ARITHMETIC_OP_COLLAPSE_NUMBER;       *num_operands=0; }
+        { op=ARITHMETIC_OP_COLLAPSE_MEDIAN;       *num_operands=0; }
+      else if (!strcmp(string, "collapse-median"))
+        { op=ARITHMETIC_OP_COLLAPSE_MEDIAN;       *num_operands=0; }
+      else if (!strcmp(string, "collapse-sigclip-std"))
+        { op=ARITHMETIC_OP_COLLAPSE_SIGCLIP_STD;  *num_operands=0; }
+      else if (!strcmp(string, "collapse-sigclip-mean"))
+        { op=ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEAN; *num_operands=0; }
+      else if (!strcmp(string, "collapse-sigclip-median"))
+        { op=ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEDIAN;*num_operands=0;}
+      else if (!strcmp(string, "collapse-sigclip-number"))
+        { op=ARITHMETIC_OP_COLLAPSE_SIGCLIP_NUMBER;*num_operands=0;}
       else if (!strcmp(string, "add-dimension-slow"))
         { op=ARITHMETIC_OP_ADD_DIMENSION_SLOW;    *num_operands=0; }
       else if (!strcmp(string, "add-dimension-fast"))
@@ -1361,8 +1444,8 @@ arithmetic_operator_run(struct arithmeticparams *p, int 
operator,
                                            flags, d1, d2, d3));
     }
 
-  /* No need to call the arithmetic library, call the proper
-     wrappers directly. */
+  /* No need to call the arithmetic library, call the proper wrappers
+     directly. */
   else
     {
       switch(operator)
@@ -1406,7 +1489,12 @@ arithmetic_operator_run(struct arithmeticparams *p, int 
operator,
         case ARITHMETIC_OP_COLLAPSE_MIN:
         case ARITHMETIC_OP_COLLAPSE_MAX:
         case ARITHMETIC_OP_COLLAPSE_MEAN:
+        case ARITHMETIC_OP_COLLAPSE_MEDIAN:
         case ARITHMETIC_OP_COLLAPSE_NUMBER:
+        case ARITHMETIC_OP_COLLAPSE_SIGCLIP_STD:
+        case ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEAN:
+        case ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEDIAN:
+        case ARITHMETIC_OP_COLLAPSE_SIGCLIP_NUMBER:
           arithmetic_collapse(p, operator_string, operator);
           break;
 
diff --git a/bin/arithmetic/arithmetic.h b/bin/arithmetic/arithmetic.h
index f1f68614..bcc78f2d 100644
--- a/bin/arithmetic/arithmetic.h
+++ b/bin/arithmetic/arithmetic.h
@@ -49,7 +49,12 @@ enum arithmetic_prog_operators
   ARITHMETIC_OP_COLLAPSE_MIN,
   ARITHMETIC_OP_COLLAPSE_MAX,
   ARITHMETIC_OP_COLLAPSE_MEAN,
+  ARITHMETIC_OP_COLLAPSE_MEDIAN,
   ARITHMETIC_OP_COLLAPSE_NUMBER,
+  ARITHMETIC_OP_COLLAPSE_SIGCLIP_STD,
+  ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEAN,
+  ARITHMETIC_OP_COLLAPSE_SIGCLIP_MEDIAN,
+  ARITHMETIC_OP_COLLAPSE_SIGCLIP_NUMBER,
   ARITHMETIC_OP_ADD_DIMENSION_SLOW,
   ARITHMETIC_OP_ADD_DIMENSION_FAST,
   ARITHMETIC_OP_REPEAT,
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 99882287..4e3cc1ab 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -16133,8 +16133,8 @@ Reading NaN as a floating point number in Gnuastro is 
not case-sensitive.
 @menu
 * Basic mathematical operators::  For example +, -, /, log, pow, and etc.
 * Trigonometric and hyperbolic operators::  sin, cos, atan, asinh, and etc
-* Constants::
-* Unit conversion operators::   magnitudes to counts, or parsecs to AUs, and 
etc.
+* Constants::                   Physical and Mathematical constants.
+* Unit conversion operators::   Various unit conversions necessary.
 * Statistical operators::       Statistics of a single dataset (for example 
mean).
 * Stacking operators::          Coadding or combining multiple datasets into 
one.
 * Filtering operators::         Smoothing a dataset through mixing pixel with 
neighbors.
@@ -17030,6 +17030,49 @@ Similar to @option{collapse-sum}, but the returned 
dataset will have the same nu
 @item collapse-max
 Similar to @option{collapse-sum}, but the returned dataset will have the same 
numeric type as the input and will contain the maximum value for each pixel 
along the collapsed dimension.
 
+@item collapse-median
+Similar to @option{collapse-sum}, but the returned dataset will have the same 
numeric type as the input and will contain the median value for each pixel 
along the collapsed dimension.
+
+The median involves sorting, therefore @code{collapse-median} will do each 
calculation in different CPU threads to speed up the operation.
+By default, Arithmetic will detect and use all available threads, but you can 
override this with the @option{--numthreads} (or @option{-N}) option.
+
+@item collapse-sigclip-mean
+Collapse the input dataset (fourth popped operand) along the FITS dimension 
given as the first popped operand by calculating the sigma-clipped mean.
+The sigma-clipping parameters (namely, the multiple of sigma and termination 
criteria) are read as the third and second popped operands respectively.
+For more on sigma-clipping, see @ref{Sigma clipping}.
+
+For example, with the command below, the pixels of the input 2 dimensional 
@file{image.fits} will be collapsed to a single dimension output.
+The first popped operand is @code{2}, so it will collapse all the pixels that 
are vertically on top of each other.
+Such that the output will have the same number of pixels as the horizontal 
axis of the input.
+During the collapsing, all pixels that are more than @mymath{3\sigma} (third 
popped operand) are rejected, and the clipping will continue until the standard 
deviation changes less than @mymath{0.2} between clips.
+
+@example
+$ astarithmetic image.fits 3 0.2 2 collapse-sigclip-mean \
+                --output=collapsed-vertical.fits
+@end example
+
+@cartouche
+@noindent
+@strong{Printing output of collapse in plain-text:} the default datatype of 
@code{collapse-sigclip-mean} is 32-bit floating point.
+This is sufficient for any observed astronomical data.
+However, if you request a plain-text output, or decide to print/view the 
output as plain-text on the standard output, the full set of decimals may not 
be printed in some situations.
+This can lead to apparently descrete values in the output of this operator 
when viewed in plain-text!
+The FITS format is always superior (since it stores the value in binary, 
therefore not having the problem above).
+But if you are forced to save the output in plain-text, use the @code{float64} 
operator after this to change the type to 64-bit floating point (which will 
print more decimals).
+@end cartouche
+
+@item collapse-sigclip-std
+Collapse the input dataset along the given FITS dimension by calculating the 
sigma-clipped standard deviation.
+Except for returning the standard deviation after clipping, this function is 
similar to @code{collapse-sigclip-mean}, see the description of that operator 
for more.
+
+@item collapse-sigclip-median
+Collapse the input dataset along the given FITS dimension by calculating the 
sigma-clipped median.
+Except for returning the median after clipping, this function is similar to 
@code{collapse-sigclip-mean}, see the description of that operator for more.
+
+@item collapse-sigclip-number
+Collapse the input dataset along the given FITS dimension by calculating the 
number of elements that remain after sigma-clipped.
+Except for returning the number after clipping, this function is similar to 
@code{collapse-sigclip-mean}, see the description of that operator for more.
+
 @item add-dimension-slow
 Build a higher-dimensional dataset from all the input datasets stacked after 
one another (along the slowest dimension).
 The first popped operand has to be a single number.
@@ -30332,18 +30375,13 @@ For more see @ref{Defining an ellipse and ellipsoid}.
 @end deftypefun
 
 @deftypefun {gal_data_t *} gal_dimension_collapse_sum (gal_data_t @code{*in}, 
size_t @code{c_dim}, gal_data_t @code{*weight})
-Collapse the input dataset (@code{in}) along the given dimension
-(@code{c_dim}, in C definition: starting from zero, from the slowest
-dimension), by summing all elements in that direction. If
-@code{weight!=NULL}, it must be a single-dimensional array, with the same
-size as the dimension to be collapsed. The respective weight will be
-multiplied to each element during the collapse.
+Collapse the input dataset (@code{in}) along the given dimension 
(@code{c_dim}, in C definition: starting from zero, from the slowest 
dimension), by summing all elements in that direction.
+If @code{weight!=NULL}, it must be a single-dimensional array, with the same 
size as the dimension to be collapsed.
+The respective weight will be multiplied to each element during the collapse.
 
-For generality, the returned dataset will have a @code{GAL_TYPE_FLOAT64}
-type. See @ref{Copying datasets} for converting the returned dataset to a
-desired type. Also, for more on the application of this function, see the
-Arithmetic program's @option{collapse-sum} operator (which uses this
-function) in @ref{Arithmetic operators}.
+For generality, the returned dataset will have a @code{GAL_TYPE_FLOAT64} type.
+See @ref{Copying datasets} for converting the returned dataset to a desired 
type.
+Also, for more on the application of this function, see the Arithmetic 
program's @option{collapse-sum} operator (which uses this function) in 
@ref{Arithmetic operators}.
 @end deftypefun
 
 @deftypefun {gal_data_t *} gal_dimension_collapse_mean (gal_data_t @code{*in}, 
size_t @code{c_dim}, gal_data_t @code{*weight})
@@ -30353,90 +30391,101 @@ over it.
 @end deftypefun
 
 @deftypefun {gal_data_t *} gal_dimension_collapse_number (gal_data_t 
@code{*in}, size_t @code{c_dim})
-Collapse the input dataset (@code{in}) along the given dimension
-(@code{c_dim}, in C definition: starting from zero, from the slowest
-dimension), by counting how many non-blank elements there are along that
-dimension.
+Collapse the input dataset (@code{in}) along the given dimension 
(@code{c_dim}, in C definition: starting from zero, from the slowest 
dimension), by counting how many non-blank elements there are along that 
dimension.
 
-For generality, the returned dataset will have a @code{GAL_TYPE_INT32}
-type. See @ref{Copying datasets} for converting the returned dataset to a
-desired type. Also, for more on the application of this function, see the
-Arithmetic program's @option{collapse-number} operator (which uses this
-function) in @ref{Arithmetic operators}.
+For generality, the returned dataset will have a @code{GAL_TYPE_INT32} type.
+See @ref{Copying datasets} for converting the returned dataset to a desired 
type.
+Also, for more on the application of this function, see the Arithmetic 
program's @option{collapse-number} operator (which uses this function) in 
@ref{Arithmetic operators}.
 @end deftypefun
 
 @deftypefun {gal_data_t *} gal_dimension_collapse_minmax (gal_data_t 
@code{*in}, size_t @code{c_dim}, int @code{max1_min0})
-Collapse the input dataset (@code{in}) along the given dimension
-(@code{c_dim}, in C definition: starting from zero, from the slowest
-dimension), by using the largest/smallest non-blank value along that
-dimension. If @code{max1_min0} is non-zero, then the collapsed dataset will
-have the maximum value along the given dimension and if it is zero, the
-minimum.
+Collapse the input dataset (@code{in}) along the given dimension 
(@code{c_dim}, in C definition: starting from zero, from the slowest 
dimension), by using the largest/smallest non-blank value along that dimension.
+If @code{max1_min0} is non-zero, then the collapsed dataset will have the 
maximum value along the given dimension and if it is zero, the minimum.
+@end deftypefun
+
+@deftypefun {gal_data_t *} gal_dimension_collapse_median (gal_data_t 
@code{*in}, size_t @code{c_dim}, size_t @code{numthreads}, size_t 
@code{minmapsize}, int @code{quietmmap})
+Collapse the input dataset (@code{in}) along the given dimension 
(@code{c_dim}, in C definition: starting from zero, from the slowest 
dimension), by finding the median non-blank value along that dimension.
+Since the median involves sorting, this operator benenfits from many threads 
(which needs to be set with @code{numthreads}).
+For more on @code{minmapsize} and @code{quietmmap} see @ref{Memory management}.
+@end deftypefun
+
+@deftypefun {gal_data_t *} gal_dimension_collapse_sclip_std (gal_data_t 
@code{*in}, size_t @code{c_dim}, float @code{multip}, float @code{param}, 
size_t @code{numthreads}, size_t @code{minmapsize}, int @code{quietmmap})
+Collapse the input dataset (@code{in}) along the given dimension 
(@code{c_dim}, in C definition: starting from zero, from the slowest 
dimension), by finding the standard deviation of pixels along that dimension 
after sigma-clipping.
+Since sigma-clipping involves sorting, this operator benenfits from many 
threads (which needs to be set with @code{numthreads}).
+For more on @code{minmapsize} and @code{quietmmap} see @ref{Memory management}.
+For more on sigma clipping, see @ref{Sigma clipping}.
+@end deftypefun
+
+@deftypefun {gal_data_t *} gal_dimension_collapse_sclip_mean (gal_data_t 
@code{*in}, size_t @code{c_dim}, float @code{multip}, float @code{param}, 
size_t @code{numthreads}, size_t @code{minmapsize}, int @code{quietmmap})
+Collapse the input dataset (@code{in}) along the given dimension 
(@code{c_dim}, in C definition: starting from zero, from the slowest 
dimension), by finding the mean of pixels along that dimension after 
sigma-clipping.
+Since sigma-clipping involves sorting, this operator benenfits from many 
threads (which needs to be set with @code{numthreads}).
+For more on @code{minmapsize} and @code{quietmmap} see @ref{Memory management}.
+For more on sigma clipping, see @ref{Sigma clipping}.
+@end deftypefun
+
+@deftypefun {gal_data_t *} gal_dimension_collapse_sclip_median (gal_data_t 
@code{*in}, size_t @code{c_dim}, float @code{multip}, float @code{param}, 
size_t @code{numthreads}, size_t @code{minmapsize}, int @code{quietmmap})
+Collapse the input dataset (@code{in}) along the given dimension 
(@code{c_dim}, in C definition: starting from zero, from the slowest 
dimension), by finding the median of pixels along that dimension after 
sigma-clipping.
+Since sigma-clipping involves sorting, this operator benenfits from many 
threads (which needs to be set with @code{numthreads}).
+For more on @code{minmapsize} and @code{quietmmap} see @ref{Memory management}.
+For more on sigma clipping, see @ref{Sigma clipping}.
+@end deftypefun
+
+@deftypefun {gal_data_t *} gal_dimension_collapse_sclip_number (gal_data_t 
@code{*in}, size_t @code{c_dim}, float @code{multip}, float @code{param}, 
size_t @code{numthreads}, size_t @code{minmapsize}, int @code{quietmmap})
+Collapse the input dataset (@code{in}) along the given dimension 
(@code{c_dim}, in C definition: starting from zero, from the slowest 
dimension), by finding the number of pixels along that dimension after 
sigma-clipping.
+Since sigma-clipping involves sorting, this operator benenfits from many 
threads (which needs to be set with @code{numthreads}).
+For more on @code{minmapsize} and @code{quietmmap} see @ref{Memory management}.
+For more on sigma clipping, see @ref{Sigma clipping}.
 @end deftypefun
 
 @deftypefun size_t gal_dimension_remove_extra (size_t @code{ndim}, size_t 
@code{*dsize}, struct wcsprm @code{*wcs})
-Remove extra dimensions (those that only have a length of 1) from the basic
-size information of a dataset. @code{ndim} is the number of dimensions and
-@code{dsize} is an array with @code{ndim} elements containing the size
-along each dimension in the C dimension order. When @code{wcs!=NULL}, the
-respective dimension will also be removed from the WCS.
+Remove extra dimensions (those that only have a length of 1) from the basic 
size information of a dataset.
+@code{ndim} is the number of dimensions and @code{dsize} is an array with 
@code{ndim} elements containing the size along each dimension in the C 
dimension order.
+When @code{wcs!=NULL}, the respective dimension will also be removed from the 
WCS.
 
-This function will return the new number of dimensions and the @code{dsize}
-elements will contain the length along each new dimension.
+This function will return the new number of dimensions and the @code{dsize} 
elements will contain the length along each new dimension.
 @end deftypefun
 
 @deffn {Function-like macro} GAL_DIMENSION_NEIGHBOR_OP (@code{index}, 
@code{ndim}, @code{dsize}, @code{connectivity}, @code{dinc}, @code{operation})
-Parse the neighbors of the element located at @code{index} and do the
-requested operation on them. This is defined as a macro to allow easy
-definition of any operation on the neighbors of a given element without
-having to use loops within your source code (the loops are implemented by
-this macro). For an example of using this function, please see @ref{Library
-demo - inspecting neighbors}. The input arguments to this function-like
-macro are described below:
+Parse the neighbors of the element located at @code{index} and do the 
requested operation on them.
+This is defined as a macro to allow easy definition of any operation on the 
neighbors of a given element without having to use loops within your source 
code (the loops are implemented by this macro).
+For an example of using this function, please see @ref{Library demo - 
inspecting neighbors}.
+The input arguments to this function-like macro are described below:
 
 @table @code
 @item index
-Distance of this element from the first element in the array on a
-contiguous patch of memory (starting from 0), see the discussion above.
+Distance of this element from the first element in the array on a contiguous 
patch of memory (starting from 0), see the discussion above.
 @item ndim
 The number of dimensions associated with the contiguous patch of memory.
 @item dsize
-The full array size along each dimension. This must be an array and is
-assumed to have the same number elements as @code{ndim}. See the discussion
-under the same element in @ref{Generic data container}.
+The full array size along each dimension.
+This must be an array and is assumed to have the same number elements as 
@code{ndim}.
+See the discussion under the same element in @ref{Generic data container}.
 @item connectivity
-Most distant neighbors to consider. Depending on the number of dimensions,
-different neighbors may be defined for each element. This function-like
-macro distinguish between these different neighbors with this argument. It
-has a value between @code{1} (one) and @code{ndim}. For example in a 2D
-dataset, 4-connected neighbors have a connectivity of @code{1} and
-8-connected neighbors have a connectivity of @code{2}. Note that this is
-inclusive, so in this example, a connectivity of @code{2} will also include
-connectivity @code{1} neighbors.
+Most distant neighbors to consider.
+Depending on the number of dimensions, different neighbors may be defined for 
each element.
+This function-like macro distinguish between these different neighbors with 
this argument.
+It has a value between @code{1} (one) and @code{ndim}.
+For example in a 2D dataset, 4-connected neighbors have a connectivity of 
@code{1} and 8-connected neighbors have a connectivity of @code{2}.
+Note that this is inclusive, so in this example, a connectivity of @code{2} 
will also include connectivity @code{1} neighbors.
 @item dinc
-An array keeping the length necessary to increment along each
-dimension. You can make this array with the following function. Just do not
-forget to free the array after you are done with it:
+An array keeping the length necessary to increment along each dimension.
+You can make this array with the following function.
+Just do not forget to free the array after you are done with it:
 
 @example
 size_t *dinc=gal_dimension_increment(ndim, dsize);
 @end example
 
-@code{dinc} depends on @code{ndim} and @code{dsize}, but it must be defined
-outside this function-like macro since it involves allocation to help in
-performance.
+@code{dinc} depends on @code{ndim} and @code{dsize}, but it must be defined 
outside this function-like macro since it involves allocation to help in 
performance.
 
 @item operation
-Any C operation that you would like to do on the neighbor. This macro will
-provide you a @code{nind} variable that can be used as the index of the
-neighbor that is currently being studied. It is defined as `@code{size_t
-ndim;}'. Note that @code{operation} will be repeated the number of times
-there is a neighbor for this element.
+Any C operation that you would like to do on the neighbor.
+This macro will provide you a @code{nind} variable that can be used as the 
index of the neighbor that is currently being studied.
+It is defined as `@code{size_t ndim;}'.
+Note that @code{operation} will be repeated the number of times there is a 
neighbor for this element.
 @end table
 
-This macro works fully within its own @code{@{@}} block and except for the
-@code{nind} variable that shows the neighbor's index, all the variables
-within this macro's block start with @code{gdn_}.
+This macro works fully within its own @code{@{@}} block and except for the 
@code{nind} variable that shows the neighbor's index, all the variables within 
this macro's block start with @code{gdn_}.
 @end deffn
 
 @node Linked lists, Array input output, Dimensions, Gnuastro library
diff --git a/lib/dimension.c b/lib/dimension.c
index 9bcd95ca..b5194c2a 100644
--- a/lib/dimension.c
+++ b/lib/dimension.c
@@ -26,11 +26,14 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 #include <stdio.h>
 #include <errno.h>
 #include <error.h>
+#include <string.h>
 #include <stdlib.h>
 
 #include <gnuastro/wcs.h>
 #include <gnuastro/pointer.h>
+#include <gnuastro/threads.h>
 #include <gnuastro/dimension.h>
+#include <gnuastro/statistics.h>
 
 
 
@@ -367,7 +370,12 @@ enum dimension_collapse_operation
  DIMENSION_COLLAPSE_MAX,
  DIMENSION_COLLAPSE_MIN,
  DIMENSION_COLLAPSE_MEAN,
+ DIMENSION_COLLAPSE_MEDIAN,
  DIMENSION_COLLAPSE_NUMBER,
+ DIMENSION_COLLAPSE_SIGCLIP_STD,
+ DIMENSION_COLLAPSE_SIGCLIP_MEAN,
+ DIMENSION_COLLAPSE_SIGCLIP_MEDIAN,
+ DIMENSION_COLLAPSE_SIGCLIP_NUMBER,
 };
 
 
@@ -840,8 +848,8 @@ gal_dimension_collapse_minmax(gal_data_t *in, size_t c_dim, 
int max1_min0)
     }
 
   /* Remove the respective dimension in the WCS structure also (if any
-     exists). Note that 'sum->ndim' has already been changed. So we'll use
-     'in->wcs'. */
+     exists). Note that 'minmax->ndim' has already been changed. So we'll
+     use 'in->wcs'. */
   gal_wcs_remove_dimension(minmax->wcs, in->ndim-c_dim);
 
   /* Clean up and return. */
@@ -854,6 +862,284 @@ gal_dimension_collapse_minmax(gal_data_t *in, size_t 
c_dim, int max1_min0)
 
 
 
+/* This structure can keep all information you want to pass onto the worker
+   function on each thread. */
+struct dimension_sortbased_p
+{
+  gal_data_t      *in;    /* Dataset to print values of.       */
+  gal_data_t     *out;    /* Dataset to print values of.       */
+  size_t        c_dim;    /* Dimension to collapse over.       */
+  int        operator;    /* Operator to use.                  */
+  float   sclipmultip;    /* Sigma clip multiple.              */
+  float    sclipparam;    /* Sigma clip parameter.             */
+  size_t   minmapsize;    /* Minimum size for memorymapping.   */
+  int       quietmmap;    /* If memorymapping should be quiet. */
+};
+
+
+
+
+
+static void *
+dimension_collapse_sortbased_worker(void *in_prm)
+{
+  /* Low-level definitions to be done first. */
+  struct gal_threads_params *tprm=(struct gal_threads_params *)in_prm;
+  struct dimension_sortbased_p *p=(struct dimension_sortbased_p *)tprm->params;
+
+  /* Input dataset (also used in other variable definitions). */
+  gal_data_t *in=p->in;
+
+  /* Subsequent definitions. */
+  gal_data_t *work, *stat;
+  size_t sind, a=in->dsize[0], b=in->dsize[1];
+  size_t i, j, index, c_dim=p->c_dim, wdsize=in->dsize[c_dim];
+
+  /* Allocate the dataset that will be sorted. */
+  work=gal_data_alloc(NULL, in->type, 1, &wdsize, NULL, 0,
+                      p->minmapsize, p->quietmmap, NULL, NULL, NULL);
+
+  /* Go over all the actions (pixels in this case) that were assigned to
+     this thread. */
+  for(i=0; tprm->indexs[i] != GAL_BLANK_SIZE_T; ++i)
+    {
+      /* For easy reading. */
+      index = tprm->indexs[i];
+
+      /* Reset the sizes (which may have been changed during the
+         statistical calculation), and flags (so the possible existance or
+         non-existance of blank values in one run doesn't affect the
+         next). */
+      work->flag=0;
+      work->size=work->dsize[0]=wdsize;
+
+      /* Extract the necessary components into an array. */
+      switch(in->ndim)
+        {
+        case 1:
+        case 3:
+          error(EXIT_FAILURE, 0, "%s: %zu dimensions not yet supported, "
+                "please get in touch with us at '%s' to implement it",
+                __func__, in->ndim, PACKAGE_BUGREPORT);
+          break;
+        case 2:
+          if(c_dim) /* The dim. to collapse is already contiguous. */
+            memcpy(work->array,
+                   gal_pointer_increment(in->array,   index*b, in->type),
+                   b*gal_type_sizeof(in->type));
+          else
+            {
+              for(j=0;j<a;++j)
+                memcpy(gal_pointer_increment(work->array, j,         in->type),
+                       gal_pointer_increment(in->array,   j*b+index, in->type),
+                       gal_type_sizeof(in->type));
+            }
+          break;
+        default:
+          error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at '%s' "
+                "to find the cause. This function doesn't support %zu "
+                "dimensions", __func__, PACKAGE_BUGREPORT, in->ndim);
+        }
+
+      /* For a check.
+      {
+        float *f=work->array;
+        for(j=0;j<wdsize;++j)
+          printf("%zu: %f\n", j, f[j]);
+      }
+      */
+
+      /* Do the necessary satistical operation. */
+      switch(p->operator)
+        {
+        case DIMENSION_COLLAPSE_MEDIAN:
+          sind=0;
+          stat=gal_statistics_median(work, 1);
+          break;
+        case DIMENSION_COLLAPSE_SIGCLIP_STD:
+        case DIMENSION_COLLAPSE_SIGCLIP_MEAN:
+        case DIMENSION_COLLAPSE_SIGCLIP_MEDIAN:
+        case DIMENSION_COLLAPSE_SIGCLIP_NUMBER:
+          stat=gal_statistics_sigma_clip(work, p->sclipmultip,
+                                         p->sclipparam, 1, 1);
+          switch(p->operator)
+            {
+            case DIMENSION_COLLAPSE_SIGCLIP_STD:     sind=3; break;
+            case DIMENSION_COLLAPSE_SIGCLIP_MEAN:    sind=2; break;
+            case DIMENSION_COLLAPSE_SIGCLIP_MEDIAN:
+              stat=gal_data_copy_to_new_type_free(stat, in->type);
+              sind=1; break;
+            case DIMENSION_COLLAPSE_SIGCLIP_NUMBER:
+              stat=gal_data_copy_to_new_type_free(stat, GAL_TYPE_UINT32);
+              sind=0; break;
+            }
+          break;
+        default:
+          error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at '%s' "
+                "to fix the problem. The operator code %d isn't "
+                "recognized", __func__, PACKAGE_BUGREPORT, p->operator);
+        }
+
+      /* Copy the result from the statistics output into the output array
+         on the desired index, then free the 'stat' array. */
+      memcpy(gal_pointer_increment(p->out->array, index, p->out->type),
+             gal_pointer_increment(stat->array,   sind,  stat->type),
+             gal_type_sizeof(stat->type));
+      gal_data_free(stat);
+    }
+
+  /* Clean up. */
+  gal_data_free(work);
+
+  /* Wait for all the other threads to finish, then return. */
+  if(tprm->b) pthread_barrier_wait(tprm->b);
+  return NULL;
+}
+
+
+
+
+
+static gal_data_t *
+dimension_collapse_sortbased(gal_data_t *in, size_t c_dim, int operator,
+                             float sclipmultip, float sclipparam,
+                             size_t numthreads, size_t minmapsize,
+                             int quietmmap)
+{
+  size_t cnum=0;
+  gal_data_t *out;
+  double *warr=NULL;
+  int otype=GAL_TYPE_INVALID;
+  size_t outdsize[10], outndim;
+  struct dimension_sortbased_p p;
+
+  /* During the median calculation, blank elements will automatically be
+     removed. */
+  int hasblank=0;
+
+  /* Basic sanity checks. */
+  if( dimension_collapse_sanity_check(in, NULL, c_dim, hasblank,
+                                      &cnum, &warr)!=NULL )
+    error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at '%s' to fix "
+          "the problem. This functions should always return NULL here",
+          __func__, PACKAGE_BUGREPORT);
+
+  /* Set the size of the collapsed output. */
+  dimension_collapse_sizes(in, c_dim, &outndim, outdsize);
+
+  /* The output array (and its type). */
+  switch(operator)
+    {
+    case DIMENSION_COLLAPSE_MEDIAN:         otype=in->type;         break;
+    case DIMENSION_COLLAPSE_SIGCLIP_STD:    otype=GAL_TYPE_FLOAT32; break;
+    case DIMENSION_COLLAPSE_SIGCLIP_MEAN:   otype=GAL_TYPE_FLOAT32; break;
+    case DIMENSION_COLLAPSE_SIGCLIP_MEDIAN: otype=in->type;         break;
+    case DIMENSION_COLLAPSE_SIGCLIP_NUMBER: otype=GAL_TYPE_UINT32;  break;
+    default:
+      error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at '%s' "
+            "to fix the problem. The operator code %d is not a "
+            "recognized operator ID", __func__, PACKAGE_BUGREPORT,
+            operator);
+    }
+  out=gal_data_alloc(NULL, otype, outndim, outdsize, in->wcs,
+                     1, in->minmapsize, in->quietmmap, NULL, NULL, NULL);
+
+  /* Spin-off the threads and do the processing on each thread. */
+  p.in=in;
+  p.out=out;
+  p.c_dim=c_dim;
+  p.operator=operator;
+  p.quietmmap=quietmmap;
+  p.minmapsize=minmapsize;
+  p.sclipparam=sclipparam;
+  p.sclipmultip=sclipmultip;
+  gal_threads_spin_off(dimension_collapse_sortbased_worker, &p,
+                       out->size, numthreads,
+                       minmapsize, quietmmap);
+
+  /* Remove the respective dimension in the WCS structure also (if any
+     exists). Note that 'out->ndim' has already been changed. So we'll use
+     'in->wcs'. */
+  gal_wcs_remove_dimension(out->wcs, in->ndim-c_dim);
+
+  /* Return the collapsed dataset. */
+  return out;
+}
+
+
+
+
+
+gal_data_t *
+gal_dimension_collapse_median(gal_data_t *in, size_t c_dim,
+                              size_t numthreads, size_t minmapsize,
+                              int quietmmap)
+{
+  int op=DIMENSION_COLLAPSE_MEDIAN;
+  return dimension_collapse_sortbased(in, c_dim, op, NAN, NAN,
+                                      numthreads, minmapsize,
+                                      quietmmap);
+}
+
+
+
+
+
+gal_data_t *
+gal_dimension_collapse_sclip_std(gal_data_t *in, size_t c_dim,
+                                 float multip, float param,
+                                 size_t numthreads, size_t minmapsize,
+                                 int quietmmap)
+{
+  int op=DIMENSION_COLLAPSE_SIGCLIP_STD;
+  return dimension_collapse_sortbased(in, c_dim, op, multip, param,
+                                      numthreads, minmapsize, quietmmap);
+}
+
+
+
+
+
+gal_data_t *
+gal_dimension_collapse_sclip_mean(gal_data_t *in, size_t c_dim,
+                                  float multip, float param,
+                                  size_t numthreads, size_t minmapsize,
+                                  int quietmmap)
+{
+  int op=DIMENSION_COLLAPSE_SIGCLIP_MEAN;
+  return dimension_collapse_sortbased(in, c_dim, op, multip, param,
+                                      numthreads, minmapsize, quietmmap);
+}
+
+
+
+
+
+gal_data_t *
+gal_dimension_collapse_sclip_median(gal_data_t *in, size_t c_dim,
+                                    float multip, float param,
+                                    size_t numthreads, size_t minmapsize,
+                                    int quietmmap)
+{
+  int op=DIMENSION_COLLAPSE_SIGCLIP_MEDIAN;
+  return dimension_collapse_sortbased(in, c_dim, op, multip, param,
+                                      numthreads, minmapsize, quietmmap);
+}
+
+gal_data_t *
+gal_dimension_collapse_sclip_number(gal_data_t *in, size_t c_dim,
+                                    float multip, float param,
+                                    size_t numthreads, size_t minmapsize,
+                                    int quietmmap)
+{
+  int op=DIMENSION_COLLAPSE_SIGCLIP_NUMBER;
+  return dimension_collapse_sortbased(in, c_dim, op, multip, param,
+                                      numthreads, minmapsize, quietmmap);
+}
+
+
+
+
 
 
 
diff --git a/lib/gnuastro/dimension.h b/lib/gnuastro/dimension.h
index 516b581f..b575b039 100644
--- a/lib/gnuastro/dimension.h
+++ b/lib/gnuastro/dimension.h
@@ -117,6 +117,35 @@ gal_dimension_collapse_number(gal_data_t *in, size_t 
c_dim);
 gal_data_t *
 gal_dimension_collapse_minmax(gal_data_t *in, size_t c_dim, int max1_min0);
 
+gal_data_t *
+gal_dimension_collapse_median(gal_data_t *in, size_t c_dim,
+                              size_t numthreads, size_t minmapsize,
+                              int quietmmap);
+
+gal_data_t *
+gal_dimension_collapse_sclip_std(gal_data_t *in, size_t c_dim,
+                                 float multip, float param,
+                                 size_t numthreads, size_t minmapsize,
+                                 int quietmmap);
+
+gal_data_t *
+gal_dimension_collapse_sclip_mean(gal_data_t *in, size_t c_dim,
+                                  float multip, float param,
+                                  size_t numthreads, size_t minmapsize,
+                                  int quietmmap);
+
+gal_data_t *
+gal_dimension_collapse_sclip_median(gal_data_t *in, size_t c_dim,
+                                    float multip, float param,
+                                    size_t numthreads, size_t minmapsize,
+                                    int quietmmap);
+
+gal_data_t *
+gal_dimension_collapse_sclip_number(gal_data_t *in, size_t c_dim,
+                                    float multip, float param,
+                                    size_t numthreads, size_t minmapsize,
+                                    int quietmmap);
+
 
 
 /************************************************************************/
diff --git a/lib/statistics.c b/lib/statistics.c
index 910040de..db4372f8 100644
--- a/lib/statistics.c
+++ b/lib/statistics.c
@@ -2260,7 +2260,6 @@ gal_statistics_sigma_clip(gal_data_t *input, float 
multip, float param,
   gal_data_t *nbs=gal_statistics_no_blank_sorted(input, inplace);
   size_t maxnum = param>=1.0f ? param : GAL_STATISTICS_SIG_CLIP_MAX_CONVERGE;
 
-
   /* Some sanity checks. */
   if( multip<=0 )
     error(EXIT_FAILURE, 0, "%s: 'multip', must be greater than zero. The "
@@ -2326,6 +2325,7 @@ gal_statistics_sigma_clip(gal_data_t *input, float 
multip, float param,
 
     /* More than one element. */
     default:
+
       /* Print the comments if requested. */
       if(!quiet)
         printf("%-8s %-10s %-15s %-15s %-15s\n",
[Prev in Thread]
Current Thread
[Next in Thread]
[gnuastro-commits] master 3aa904ec: Library (dimension.h): functions to collapse by sigma-clipping, Mohammad Akhlaghi <=
Prev by Date: [gnuastro-commits] master 1ca16efa: Webpage: added links to Gnuastro's two Matrix-protocol Chat rooms
Next by Date: [gnuastro-commits] master 1c84ad58: Library (dimension.h): collapsing by sort-based also on 1D and 3D
Previous by thread: [gnuastro-commits] master 1ca16efa: Webpage: added links to Gnuastro's two Matrix-protocol Chat rooms
Next by thread: [gnuastro-commits] master 1c84ad58: Library (dimension.h): collapsing by sort-based also on 1D and 3D
Index(es):
- Date
- Thread