diff --git i/NEWS w/NEWS index f769b18..8bb5d43 100644 --- i/NEWS +++ w/NEWS @@ -1,5 +1,9 @@ * Noteworthy changes in release ?.? (????-??-??) [?] +** New Features + + datamash(1): -g/groupby takes ranges of columns (e.g. 1-4) + * Noteworthy changes in release 1.8 (2022-07-23) [stable] diff --git i/doc/datamash.texi w/doc/datamash.texi index e5693a5..e149880 100644 --- i/doc/datamash.texi +++ w/doc/datamash.texi @@ -216,8 +216,8 @@ While using this option with non-linewise operations was historically permitted, it never produced very sensible output. Such usage has been deprecated, and in a future release it will result in an error. -@item --group=@var{X[,Y,X]} -@itemx -g @var{X[,Y,X]} +@item --group=@var{X[,Y,Z]} +@itemx -g @var{X[,Y,Z]} @opindex --group @opindex -g @cindex grouping @@ -227,6 +227,7 @@ must be sorted by the same fields @var{X[,Y,Z]}. Use @option{--sort} to automatically sort the input. If @option{--group} is not specified, each operation is performed in the entire input file. +Ranges of field numbers like @var{X-Z} are also supported. @item --header-in @opindex --header-in diff --git i/src/op-parser.c w/src/op-parser.c index 6b464e1..cfff7c9 100644 --- i/src/op-parser.c +++ w/src/op-parser.c @@ -557,7 +557,27 @@ parse_mode_column (enum processing_mode pm) case TOK_INTEGER: if (scan_val_int>0) { - ADD_NUMERIC_GROUP (scan_val_int); + int begin_range = scan_val_int, end_range; + if (scanner_peek_token () != TOK_DASH) + { + ADD_NUMERIC_GROUP (scan_val_int); + } + else + { + scanner_get_token (); + if (scanner_get_token () != TOK_INTEGER + || scan_val_int < (uintmax_t) begin_range) + { + die (EXIT_FAILURE, 0, + _("invalid field range for operation %s"), + quote (get_processing_mode_name (pm))); + } + end_range = scan_val_int; + for (int i = begin_range; i <= end_range; i++) + { + ADD_NUMERIC_GROUP (i); + } + } break; } /* fallthrough */ diff --git i/tests/datamash-crosstab.pl w/tests/datamash-crosstab.pl index e368658..25d9d2c 100755 --- i/tests/datamash-crosstab.pl +++ w/tests/datamash-crosstab.pl @@ -171,6 +171,7 @@ my @Tests = ['c1','crosstab 1,2 first 3', {IN_PIPE=>$in1}, {OUT=>$out1_first}], ['c2','ct 1,2 first 3', {IN_PIPE=>$in1}, {OUT=>$out1_first}], ['c3','ct 1,2 count 1', {IN_PIPE=>$in1}, {OUT=>$out1_count}], + ['c3a','ct 1-2 count 1', {IN_PIPE=>$in1}, {OUT=>$out1_count}], # Default operation is count ['c4','ct 1,2', {IN_PIPE=>$in1}, {OUT=>$out1_count}], @@ -214,8 +215,6 @@ my @Tests = {ERR=>"$prog: crosstab supports one operation, found 2\n"}], ['e7', 'ct 1:2', {IN_PIPE=>""}, {EXIT=>1}, {ERR=>"$prog: invalid field pair for operation 'crosstab'\n"}], - ['e8', 'ct 1-2', {IN_PIPE=>""}, {EXIT=>1}, - {ERR=>"$prog: invalid field range for operation 'crosstab'\n"}], ); my $save_temps = $ENV{SAVE_TEMPS}; diff --git i/tests/datamash-error-msgs.pl w/tests/datamash-error-msgs.pl index 34259c1..5c899ef 100644 --- i/tests/datamash-error-msgs.pl +++ w/tests/datamash-error-msgs.pl @@ -175,8 +175,6 @@ my @Tests = # Invalid field specifications for primary operations ['e90', 'groupby 1:2', {IN_PIPE=>""}, {EXIT=>1}, {ERR=>"$prog: invalid field pair for operation 'groupby'\n"}], - ['e91', 'groupby 1-2', {IN_PIPE=>""}, {EXIT=>1}, - {ERR=>"$prog: invalid field range for operation 'groupby'\n"}], # values for strbin operation ['e92','strbin:- 1', {IN_PIPE=>""}, {EXIT=>1}, diff --git i/tests/datamash-tests.pl w/tests/datamash-tests.pl index 687a5b1..d78fb69 100755 --- i/tests/datamash-tests.pl +++ w/tests/datamash-tests.pl @@ -323,6 +323,54 @@ my $in_hdr_only=<<'EOF'; X:Y:Z EOF +my $in_multiple_groups=<<'EOF'; +a a a foo +a a a bar +a a b baz +a b b alice +a b c bob +b b c eve +b c d frank +b c d foo +b c d bar +b d d baz +c d a alice +c d a bob +c e b eve +c e b frank +c e c foo +d a c bar +d a d baz +d a d alice +d b e bob +d b e eve +e b a frank +e c a foo +e c b bar +e c b baz +e d c alice +EOF + +my $out_multiple_groups=<<'EOF'; +a a a bar,foo +a a b baz +a b b alice +a b c bob +b b c eve +b c d bar,foo,frank +b d d baz +c d a alice,bob +c e b eve,frank +c e c foo +d a c bar +d a d alice,baz +d b e bob,eve +e b a frank +e c a foo +e c b bar,baz +e d c alice +EOF + =pod Example: my $data = "a 1\nb 2\n"; @@ -457,9 +505,13 @@ my @Tests = "line 1 has only 3 fields\n"}], ['e25', '-g 1,,2 sum 1' , {IN_PIPE=>"a\n"}, {EXIT=>1}, {ERR=>"$prog: missing field for operation 'groupby'\n"}], - ['e26', '--collapse-delimiter=foo', {IN_PIPE=>"a\n"}, {EXIT=>1}, + ['e26', '-g 4-1 sum 1', {IN_PIPE=>"a\n"}, {EXIT=>1}, + {ERR=>"$prog: invalid field range for operation 'groupby'\n"}], + ['e27', '-g 1- sum 1', {IN_PIPE=>"a\n"}, {EXIT=>1}, + {ERR=>"$prog: invalid field range for operation 'groupby'\n"}], + ['e28', '--collapse-delimiter=foo', {IN_PIPE=>"a\n"}, {EXIT=>1}, {ERR=>"$prog: the delimiter must be a single character\n"}], - ['e27', '-c foo', {IN_PIPE=>"a\n"}, {EXIT=>1}, + ['e29', '-c foo', {IN_PIPE=>"a\n"}, {EXIT=>1}, {ERR=>"$prog: the delimiter must be a single character\n"}], # No newline at the end of the lines @@ -504,6 +556,10 @@ my @Tests = # Numeric operation on an empty field should not work ['f23', '-t: -g1 sum 2', {IN_PIPE=>$in_empty1}, {EXIT=>1}, {ERR=>"$prog: invalid numeric value in line 1 field 2: ''\n"}], + ['f24', '-W -g1,2,3 uniq 4', + {IN_PIPE=>$in_multiple_groups}, {OUT=>$out_multiple_groups}], + ['f25', '-W -g1-3 uniq 4', + {IN_PIPE=>$in_multiple_groups}, {OUT=>$out_multiple_groups}], # whitespace only, different field delimiters ['ws1.1', 'check', {IN_PIPE=>$ws1}, {OUT=>"1 line, 4 fields\n"}], @@ -568,7 +624,8 @@ my @Tests = # Multiple keys (from different columns) ['g8.1', '-t" " -g1,3 sum 2', {IN_PIPE=>$in_g3}, {OUT=>"A W 15\nA X 24\nB Y 17\nB Z 19\nC Z 23\n"}], - + ['g8.2', '-t" " -g1-2 count 2', {IN_PIPE=>$in_g4}, + {OUT=>"A 5 1\nK 6 1\nP 2 1\n"}], # count on non-numeric fields ['cnt1', '-t" " -g 1 count 1', {IN_PIPE=>$in_g2},