gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 68f645f: Table: --metaupdate, new option to up


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 68f645f: Table: --metaupdate, new option to update column meta data
Date: Tue, 21 Jul 2020 21:48:24 -0400 (EDT)

branch: master
commit 68f645f93c59d29545620a687e40522b6ef56c77
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Table: --metaupdate, new option to update column meta data
    
    Until now, after column arithmetic or column concatenation, the new columns
    in the output table didn't have good metadata and the users were forced to
    use 'astfits' to update the column names, and units and comments. This was
    very inconvenient and was thus easily forgotten or ignored. But metadata
    are very important and without them the usability of the table will be very
    limited (even the author will forget!).
    
    With this commit, a new '--metaupdate' option has been added that will
    allow updating column metadata in the same command that generates the
    table.
---
 NEWS              |  5 ++++
 bin/table/args.h  | 15 ++++++++++++
 bin/table/main.h  |  1 +
 bin/table/table.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 bin/table/ui.c    |  9 +++++++
 bin/table/ui.h    |  3 ++-
 doc/gnuastro.texi | 51 ++++++++++++++++++++++++++++++++++++++
 lib/options.c     |  9 +++----
 8 files changed, 160 insertions(+), 6 deletions(-)

diff --git a/NEWS b/NEWS
index 30ab78c..832c81a 100644
--- a/NEWS
+++ b/NEWS
@@ -39,6 +39,11 @@ See the end of the file for license conditions.
      columns will be appended with a '-N' (where 'N' is a counter for the
      file that is used to append columns). The default behavior is to avoid
      multiple columns having the same name.
+   - New '--metaupdate' option to add/update the column metadata (name,
+     units or comments) just before writing the output. This is a very
+     useful feature in combination with column arithmetic or column
+     concatenation because it will allow you to update the new column
+     metadata in the same command. See the manual for more.
 
   Library:
    - Spectral lines library: SiIII, OIII, CIV, NV and rest of Lyman series.
diff --git a/bin/table/args.h b/bin/table/args.h
index bd734c1..f273fd5 100644
--- a/bin/table/args.h
+++ b/bin/table/args.h
@@ -154,6 +154,21 @@ struct argp_option program_options[] =
       GAL_OPTIONS_NOT_MANDATORY,
       GAL_OPTIONS_NOT_SET
     },
+    {
+      "metaupdate",
+      UI_KEY_METAUPDATE,
+      "STR,STR[,STR,STR]",
+      0,
+      "Update output metadata (name, unit, comments).",
+      GAL_OPTIONS_GROUP_OUTPUT,
+      &p->metaupdate,
+      GAL_TYPE_STRING,
+      GAL_OPTIONS_RANGE_ANY,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET,
+      gal_options_parse_name_and_strings
+    },
+
 
 
 
diff --git a/bin/table/main.h b/bin/table/main.h
index 9bef786..3898512 100644
--- a/bin/table/main.h
+++ b/bin/table/main.h
@@ -104,6 +104,7 @@ struct tableparams
   gal_list_str_t *catcolumnhdu;  /* HDU/extension for the catcolumn.    */
   gal_list_str_t  *catcolumns;  /* List of columns to concatenate.      */
   uint8_t    catcolumnrawname;  /* Don't modify name of appended col.   */
+  gal_data_t      *metaupdate;  /* Update column metadata.              */
 
   /* Internal. */
   struct column_pack *outcols;  /* Output column packages.              */
diff --git a/bin/table/table.c b/bin/table/table.c
index 6cc3608..53db49a 100644
--- a/bin/table/table.c
+++ b/bin/table/table.c
@@ -676,6 +676,76 @@ table_catcolumn(struct tableparams *p)
 
 
 
+void
+table_metaupdate(struct tableparams *p)
+{
+  char **strarr;
+  gal_data_t *meta, *col;
+  size_t counter, *colnum;
+
+  /* Loop through all the given updates and implement them. */
+  for(meta=p->metaupdate;meta!=NULL;meta=meta->next)
+    {
+      /* If the given column specifier is a name (not parse-able as a
+         number), then this condition will fail. */
+      colnum=NULL;
+      if( gal_type_from_string((void **)(&colnum), meta->name,
+                               GAL_TYPE_SIZE_T) )
+        {
+          /* We have been given a string, so find the first column that has
+             the same name. */
+          for(col=p->table; col!=NULL; col=col->next)
+            if(!strcmp(col->name, meta->name)) break;
+        }
+      /* The column specifier is a number. */
+      else
+        {
+          /* Go over the columns and find the one with this counter. */
+          counter=1;
+          for(col=p->table; col!=NULL; col=col->next)
+            if(counter++==colnum[0]) break;
+
+          /* Clean up the space that was allocated for 'colnum' (its not
+             allocated when the given value was a string). */
+          free(colnum);
+        }
+
+      /* If a match was found, then 'col' should not be NULL. */
+      if(col==NULL)
+        error(EXIT_FAILURE, 0, "no column found for '%s' (given to "
+              "'--metaupdate'). Columns can either be specified by "
+              "their position in the output table (integer counter, "
+              "starting from 1), or their name (the first column "
+              "found with the given name will be used)", meta->name);
+
+      /* The matching column is found and we know that atleast one value is
+         already given (otherwise 'gal_options_parse_name_and_values' would
+         abort the program). The first given string is the new name. */
+      strarr=meta->array;
+      if(col->name) free(col->name);
+      gal_checkset_allocate_copy(strarr[0], &col->name);
+
+      /* If more than one string is given, the second one is the new
+         unit. */
+      if(meta->size>1)
+        {
+          /* Replace the unit. */
+          if(col->unit) free(col->unit);
+          gal_checkset_allocate_copy(strarr[1], &col->unit);
+
+          /* The next element is the comment of the column. */
+          if(meta->size>2)
+            {
+              if(col->comment) free(col->comment);
+              gal_checkset_allocate_copy(strarr[2], &col->comment);
+            }
+        }
+    }
+}
+
+
+
+
 
 
 
@@ -714,6 +784,9 @@ table(struct tableparams *p)
   /* Concatenate the columns of tables (if required)*/
   if(p->catcolumnfile) table_catcolumn(p);
 
+  /* If column metadata should be updated, do it just before writing. */
+  if(p->metaupdate) table_metaupdate(p);
+
   /* Write the output. */
   gal_table_write(p->table, NULL, p->cp.tableformat, p->cp.output,
                   "TABLE", p->colinfoinstdout);
diff --git a/bin/table/ui.c b/bin/table/ui.c
index ea9034f..7c66f6b 100644
--- a/bin/table/ui.c
+++ b/bin/table/ui.c
@@ -276,6 +276,15 @@ ui_read_check_only_options(struct tableparams *p)
   if(p->head!=GAL_BLANK_SIZE_T && p->tail!=GAL_BLANK_SIZE_T)
     error(EXIT_FAILURE, 0, "'--head' and '--tail' options cannot be "
           "called together");
+
+  /* If '--metaupdate' is given, make sure none of the given options have
+     more than three values. */
+  if(p->metaupdate)
+    for(tmp=p->metaupdate;tmp!=NULL;tmp=tmp->next)
+      if(tmp->size>3)
+        error(EXIT_FAILURE, 0, "at most three values can be given to each "
+              "call of '--metaupdate' ('-m') after the original columns "
+              "name or number. But %zu strings have been given", tmp->size);
 }
 
 
diff --git a/bin/table/ui.h b/bin/table/ui.h
index 23be7a4..14f776c 100644
--- a/bin/table/ui.h
+++ b/bin/table/ui.h
@@ -41,7 +41,7 @@ enum program_args_groups
 
 /* Available letters for short options:
 
-   a b d f g j k l m p t v x y z
+   a b d f g j k l p t v x y z
    A B E G H J O Q R X Y
 */
 enum option_keys_enum
@@ -62,6 +62,7 @@ enum option_keys_enum
   UI_KEY_CATCOLUMNS      = 'C',
   UI_KEY_CATCOLUMNHDU    = 'u',
   UI_KEY_CATCOLUMNFILE   = 'L',
+  UI_KEY_METAUPDATE      = 'm',
 
   /* Only with long version (start with a value 1000, the rest will be set
      automatically). */
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 1db0a48..ae0dd67 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -9161,6 +9161,12 @@ Comparison of the two commands above clearly shows why 
it is recommended to use
 When the columns have descriptive names, the command/script actually becomes 
much more readable, describing the intent of the operation.
 It is also independent of the low-level table structure: for the second 
command, the position of the @code{AWAV} and @code{SPECTRUM} columns in 
@file{table.fits} is irrelevant.
 
+By nature, column arithmetic changes the values of the data within the column.
+So the old column meta data can't be used any more.
+By default the new column created for the arithmetic operation will be given 
generic metadata (for example its name will be @code{ARITH_1}, which is hardly 
useful!).
+But meta data are critically important and it is good practice to always have 
short, but descriptive, names for each columns, units and also some comments 
for more explanation.
+To add metadata to a column, you can use the @option{--metaupdate} option that 
is described in @ref{Invoking asttable}.
+
 Finally, since the arithmetic expressions are a value to @option{--column}, it 
doesn't necessarily have to be a separate option, so the commands above are 
also identical to the command below (note that this only has one @option{-c} 
option).
 Just be very careful with the quoting!
 
@@ -9173,6 +9179,8 @@ In particular, the few that are not present in the 
Gnuastro library aren't yet s
 For a list of the Gnuastro library arithmetic operators, please see the macros 
starting with @code{GAL_ARITHMETIC_OP} and ending with the operator name in 
@ref{Arithmetic on datasets}.
 Besides the operators in @ref{Arithmetic operators}, several operators are 
only available in Table to use on table columns.
 
+
+
 @cindex WCS: World Coordinate System
 @cindex World Coordinate System (WCS)
 @table @code
@@ -9393,6 +9401,18 @@ Also note that no operation (for example row selection, 
arithmetic or etc) is ap
 If the appended columns have a name, the column names of each file will be 
appended with a @code{-N}, where @code{N} is a counter starting from 1 for each 
appended file.
 This is done because when concatenating columns from multiple tables (more 
than two) into one, they may have the same name, and its not good practice to 
have multiple columns with the same name.
 You can disable this feature with @option{--catcolumnrawname}.
+To have full control over the concatenated column names, you can use the 
@option{--metaupdate} option described below.
+
+For example, let's assume you have two catalogs of the same objects (same 
number of rows) in different filters.
+Such that @file{f160w-cat.fits} has a @code{MAGNITUDE} column that has the 
magnitude of each object in the @code{F160W} filter and similarly 
@file{f105w-cat.fits}, also has a @code{MAGNITUDE} column, but for the 
@code{F105W} filter.
+You can use column concatenation like below to import the @code{MAGNITUDE} 
column from the @code{F105W} catalog into the @code{F160W} catalog, while 
giving each magnitude column a different name:
+
+@example
+asttable f160w-cat.fits --output=both.fits \
+  --catcolumnfile=f105w-cat.fits --catcolumns=MAGNITUDE \
+  --metaupdate=MAGNITUDE,MAG-F160W,log,"Magnitude in F160W" \
+  --metaupdate=MAGNITUDE-1,MAG-F105W,log,"Magnitude in F105W"
+@end example
 
 @item -u STR/INT
 @itemx --catcolumnhdu=STR/INT
@@ -9518,6 +9538,37 @@ This behavior is taken from the @command{head} program 
in GNU Coreutils.
 Only print the given number of rows from the @emph{bottom} of the final table.
 See @option{--head} for more.
 
+@item -m STR/INT,STR[,STR[,STR]]
+@itemx --metaupdate=STR/INT,STR[,STR[,STR]]
+Update a column's metadata just before writing the final table (after all 
other operations are done, for example column arithmetic, or column 
concatenation).
+The first value (before the first comma) given to this option can either be a 
counter (positive integer, counting from 1), or a name (the column's name in 
the output if this option wasn't called).
+This option can be very useful in conjunction with column arithmetic (see 
@ref{Column arithmetic}), or column concatenation (appending multiple columns 
from different tables, for more see @option{--catcolumnfile}).
+
+After the to-be-updated column is identified, at least one other strings 
should be given, with a maximum of three strings.
+The first string after the original name will the the selected column's new 
name.
+The next (optional) string will be the selected column's unit and the third 
(optional) will be its comments.
+If the two optional strings aren't given original column's units or comments 
will remain unchanged.
+Here are three examples
+
+@table @option
+
+@item --metaupdate=MAGNITUDE,MAG_F160W
+This will convert name of the original @code{MAGNITUDE} column to 
@code{MAG_F160W}, leaving the unit and comments unchanged.
+
+@item --metaupdate=3,MAG_F160W,mag
+This will convert name of the third column of the final output to 
@code{MAG_F160W} and the units to @code{mag}, while leaving the comments 
untouched.
+
+@item --metaupdate=MAGNITUDE,MAG_F160W,mag,"Magnitude in F160W filter"
+This will convert name of the original @code{MAGNITUDE} column to 
@code{MAG_F160W}, and the units to @code{mag} and the comments to 
@code{Magnitude in F160W filter}.
+Note the double quotations around the comment string, they are necessary to 
preserve the white-space characters within the column comment from the 
command-line, into the program (otherwise, upon reaching a white-space 
character, the shell will consider this option to be finished and cause 
un-expected behavior).
+@end table
+
+The recommended way to use this option is to first do all your operations on 
your table's data and write it into a temporary file (maybe called 
@file{temp.fits}).
+Look into that file's metadata (with @command{asttable temp.fits -i}) to see 
the exact column positions and possible names, then add the necessary calls to 
this option to your previous call to @command{asttable}, so it writes proper 
metadata in the same run (for example in a script or Makefile).
+Recall that when a name is given, this option will update the metadata of the 
first column that matches, so if you have multiple columns with the same name, 
you can call this options multiple times with the same first argument to change 
them all.
+
+Finally, if you already have a FITS table by other means (for example by 
downloading) and you merely want to update the column metadata and leave the 
data intact, it is much more efficient to directly modify the respective FITS 
header keywords with @code{astfits}, using the keyword manipulation features 
described in @ref{Keyword manipulation}.
+@option{--metaupdate} is mainly intended for scenarios where you want to edit 
the data so it will always load the full/partial dataset into memory, then 
write out the resulting datasets with updated/correced metadata.
 @end table
 
 
diff --git a/lib/options.c b/lib/options.c
index 9cf89be..11a7f48 100644
--- a/lib/options.c
+++ b/lib/options.c
@@ -1133,11 +1133,10 @@ gal_options_read_sigma_clip(struct argp_option *option, 
char *arg,
 
 
 
-/* Parse name and (float64) values:  name,value1,value2,value3,...
+/* Parse name and (string/float64) values:  name,value1,value2,value3,...
 
    The output is a 'gal_data_t', where the 'name' is the given name and the
-   values are in its array (of 'float64' type).
- */
+   values are in its array (of 'char *' or 'float64' type). */
 static void *
 gal_options_parse_name_and_values(struct argp_option *option, char *arg,
                                   char *filename, size_t lineno, void *junk,
@@ -1223,10 +1222,10 @@ gal_options_parse_name_and_values(struct argp_option 
*option, char *arg,
           */
         }
       else
-        error(EXIT_FAILURE, 0, "'--%s' requires a string of numbers "
+        error(EXIT_FAILURE, 0, "'--%s' requires a series of %s "
               "(separated by ',' or ':') following its first argument, "
               "please run with '--help' for more information",
-              option->name);
+              option->name, str0_f641?"numbers":"strings");
 
       /* Our job is done, return NULL. */
       return NULL;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]