From e5bfd1d64ed1ceb1ec6423e8c89aa936e24109e5 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sat, 27 Jun 2015 08:44:31 -0700 Subject: [PATCH] accept new option: --files0-from=F * src/grep.c Include , "argv-iter.h" and "quotearg.h". (usage): Describe the new option, and adjust the `Usage': with this option, no FILE may be specified on the command line. (wrap_state): New file-scoped global. (wrapped_argv_iter_n_args, wrapped_argv_iter): New functions. (main): Handle the new option. * bootstrap.conf (gnulib_modules): Add argv-iter. * configure.ac: Set the AM_CONDITIONAL variable, HAVE_PERL. * tests/files0-from.pl: New file. * tests/Makefile.am (TESTS): Add it. * NEWS (New features): Mention it. --- NEWS | 5 ++ bootstrap.conf | 1 + src/grep.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++----- src/system.h | 7 ++ tests/Makefile.am | 1 + tests/files0-from.pl | 100 +++++++++++++++++++++++++ 6 files changed, 301 insertions(+), 18 deletions(-) create mode 100755 tests/files0-from.pl diff --git a/NEWS b/NEWS index 35c4aad..393ebf4 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,11 @@ GNU grep NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** New features + + grep accepts a new option --files0-from=FILE, where FILE contains a + list of NUL-terminated file names. + ** Improvements When building grep, 'configure' now uses PCRE's pkg-config module for diff --git a/bootstrap.conf b/bootstrap.conf index 7842928..5e96142 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -25,6 +25,7 @@ gnulib_modules=' alloca announce-gen argmatch +argv-iter binary-io btowc c-ctype diff --git a/src/grep.c b/src/grep.c index a735ea5..6cd9f6a 100644 --- a/src/grep.c +++ b/src/grep.c @@ -26,9 +26,11 @@ #include #include #include +#include #include "system.h" #include "argmatch.h" +#include "argv-iter.h" #include "c-ctype.h" #include "closeout.h" #include "colorize.h" @@ -43,6 +45,7 @@ #include "progname.h" #include "propername.h" #include "quote.h" +#include "quotearg.h" #include "safe-read.h" #include "search.h" #include "version-etc.h" @@ -310,6 +313,7 @@ enum EXCLUDE_DIRECTORY_OPTION, EXCLUDE_OPTION, EXCLUDE_FROM_OPTION, + FILES0_FROM_OPTION, GROUP_SEPARATOR_OPTION, INCLUDE_OPTION, LINE_BUFFERED_OPTION, @@ -340,6 +344,7 @@ static struct option const long_options[] = {"file", required_argument, NULL, 'f'}, {"files-with-matches", no_argument, NULL, 'l'}, {"files-without-match", no_argument, NULL, 'L'}, + {"files0-from", required_argument, NULL, FILES0_FROM_OPTION}, {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION}, {"help", no_argument, &show_help, 1}, {"include", required_argument, NULL, INCLUDE_OPTION}, @@ -1753,8 +1758,11 @@ usage (int status) { if (status != 0) { - fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), - program_name); + fprintf (stderr, _("\ +Usage: %s [OPTION]... PATTERN [FILE]...\n\ + or: %s [OPTION]... --files0-from=F PATTERN\n\ +"), + program_name, program_name); fprintf (stderr, _("Try '%s --help' for more information.\n"), program_name); } @@ -1783,6 +1791,9 @@ Regexp selection and interpretation:\n"), program_name); printf (_("\ \n\ Miscellaneous:\n\ + --files0-from=F read input from the files specified by\n\ + NUL-terminated names in file F;\n\ + If F is - then read names from standard input\n\ -s, --no-messages suppress error messages\n\ -v, --invert-match select non-matching lines\n\ -V, --version display version information and exit\n\ @@ -2159,6 +2170,62 @@ fgrep_to_grep_pattern (size_t len, char const *keys, *new_len = p - *new_keys; } +/* This global and the following two wrapper functions are solely + to support the case in which we are reading file names from F, given + --files0-from=F. We need to know, when processing the first file, + if there is at least one more file name in F (to decide whether to + print the "FILENAME: " prefix), yet the argv-iter module provides no + mechanism to peek ahead into that stream. That is the reason for the + wrapped_argv_iter function. The other function is required because we + print the index (analogous to line number) when diagnosing an empty file + name, and right after we've peeked into the stream, the value of that + function would be one too large. It compensates in that sole case. */ +static unsigned int wrap_state = 0; + +/* See above. */ +static size_t +wrapped_argv_iter_n_args (struct argv_iterator const *ai) +{ + size_t n = argv_iter_n_args (ai); + return n == 2 && wrap_state == 1 ? 1 : n; +} + +/* Just like argv_iter, but upon first call, set *TWO_OR_MORE, + to true when there are two or more file names. We must save + in malloc'd storage the second string, and be careful to free + it if this function is called again. We deliberately do not + worry about leaking that value when exiting between the one-time + allocation and one-time free. */ +static char * +wrapped_argv_iter (struct argv_iterator *ai, enum argv_iter_err *err, + bool *two_or_more) +{ + static char *f0; + static char *f1; + static enum argv_iter_err err_1; + if (wrap_state == 0) + { + f0 = argv_iter (ai, err); + if (f0) + { + f0 = xstrdup (f0); /* deliberate, possible one-time leak */ + f1 = argv_iter (ai, &err_1); + *two_or_more = f1 != NULL; + } + wrap_state = 1; + return f0; + } + else if (wrap_state == 1) + { + wrap_state = 2; + *err = err_1; + free (f0); + return f1; + } + + return argv_iter (ai, err); +} + int main (int argc, char **argv) { @@ -2170,7 +2237,9 @@ main (int argc, char **argv) int prev_optind, last_recursive; int fread_errno; intmax_t default_context; + char *files_from = NULL; FILE *fp; + exit_failure = EXIT_TROUBLE; initialize_main (&argc, &argv); set_program_name (argv[0]); @@ -2468,6 +2537,10 @@ main (int argc, char **argv) add_exclude (excluded_directory_patterns, optarg, EXCLUDE_WILDCARDS); break; + case FILES0_FROM_OPTION: + files_from = optarg; + break; + case GROUP_SEPARATOR_OPTION: group_separator = optarg; break; @@ -2584,9 +2657,6 @@ main (int argc, char **argv) skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0) == out_invert); - if ((argc - optind > 1 && !no_filenames) || with_filenames) - out_file = 1; - #ifdef SET_BINARY /* Output is set to binary mode because we shouldn't convert NL to CR-LF pairs, especially when grepping binary files. */ @@ -2601,26 +2671,125 @@ main (int argc, char **argv) devices = READ_DEVICES; char *const *files; - if (optind < argc) + struct argv_iterator *ai; + if (files_from) { - files = argv + optind; - } - else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive) - { - static char *const cwd_only[] = { (char *) ".", NULL }; - files = cwd_only; - omit_dot_slash = true; + /* When using --files0-from=F, you may not specify any files + on the command-line. */ + if (optind < argc) + { + /* Trigger with e.g., echo a|src/grep --files0-from=- PAT x */ + error (0, 0, _("extra operand %s"), quote (argv[optind])); + error (EXIT_TROUBLE, 0, + _("file operands cannot be combined with --files0-from")); + } + + if (! (STREQ (files_from, "-") || freopen (files_from, "r", stdin))) + error (EXIT_TROUBLE, errno, _("cannot open %s for reading"), + quote (files_from)); + + ai = argv_iter_init_stream (stdin); } else { - static char *const stdin_only[] = { (char *) "-", NULL }; - files = stdin_only; + if (optind < argc) + files = argv + optind; + else if (directories == RECURSE_DIRECTORIES + && prepended < last_recursive) + { + static char *const cwd_only[] = { (char *) ".", NULL }; + files = cwd_only; + omit_dot_slash = true; + } + else + { + static char *const stdin_only[] = { (char *) "-", NULL }; + files = stdin_only; + } + ai = argv_iter_init_argv ((char **) files); } + if (!ai) + xalloc_die (); + + int i; bool status = true; - do - status &= grep_command_line_arg (*files++); - while (*files != NULL); + for (i = 0; /* */; i++) + { + bool skip_file = false; + enum argv_iter_err ai_err; + bool two_or_more IF_LINT (= true); + /* Using this wrapped argv_iter function is so that we know + when there are two or more names in the input specified via + --files0-from=F (required in order to set OUT_FILE). That + requires to cache any second name, which in turn requires to + use a wrapped_argv_iter_n_args function below, when including + the position of an offending zero-length name in such an + input file. */ + char *file_name = wrapped_argv_iter (ai, &ai_err, &two_or_more); + if (i == 0 && ((two_or_more && !no_filenames) || with_filenames)) + out_file = 1; + if (!file_name) + { + switch (ai_err) + { + case AI_ERR_EOF: + goto argv_iter_done; + case AI_ERR_READ: + error (0, errno, _("%s: read error"), + quotearg_colon (files_from)); + status = false; + goto argv_iter_done; + case AI_ERR_MEM: + xalloc_die (); + default: + assert (!"unexpected error code from argv_iter"); + } + } + if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-")) + { + /* Give a better diagnostic in an unusual case: + printf - | grep --files0-from=- RE */ + error (0, 0, _("when reading file names from stdin, " + "no file name of %s allowed"), + quote (file_name)); + skip_file = true; + } + + if (!file_name[0]) + { + /* Diagnose a zero-length file name. When it's one + among many, knowing the record number may help. + FIXME: currently print the record number only with + --files0-from=FILE. Maybe do it for argv, too? */ + if (files_from == NULL) + error (0, 0, "%s", _("invalid zero-length file name")); + else + { + /* Using the standard 'filename:line-number:' prefix here is + not totally appropriate, since NUL is the separator, not NL, + but it might be better than nothing. */ + unsigned long int file_number = wrapped_argv_iter_n_args (ai); + error (0, 0, "%s:%lu: %s", quotearg_colon (files_from), + file_number, _("invalid zero-length file name")); + } + skip_file = true; + } + + if (skip_file) + errseen = true; + else + status &= grep_command_line_arg (file_name); + } + argv_iter_done: + + /* No arguments on the command line is fine. That means read from stdin. + However, no arguments on the --files0-from input stream means don't + read anything. */ + if (status && !files_from && wrapped_argv_iter_n_args (ai) == 0) + status &= grep_command_line_arg ("-"); + + argv_iter_free (ai); /* We register via atexit() to test stdout. */ return errseen ? EXIT_TROUBLE : status; diff --git a/src/system.h b/src/system.h index 15a1abb..474c6fd 100644 --- a/src/system.h +++ b/src/system.h @@ -107,4 +107,11 @@ static _GL_UNUSED void __asan_unpoison_memory_region (void const volatile *addr, size_t size) { } #endif +/* Use this to suppress gcc's '...may be used before initialized' warnings. */ +#ifdef lint +# define IF_LINT(Code) Code +#else +# define IF_LINT(Code) /* empty */ +#endif + #endif diff --git a/tests/Makefile.am b/tests/Makefile.am index ccd0196..52adad9 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -86,6 +86,7 @@ TESTS = \ fedora \ fgrep-infloop \ file \ + files0-from.pl \ fmbtest \ foad1 \ grep-dev-null \ diff --git a/tests/files0-from.pl b/tests/files0-from.pl new file mode 100755 index 0000000..8e3f6cb --- /dev/null +++ b/tests/files0-from.pl @@ -0,0 +1,100 @@ +#!/usr/bin/perl +# Exercise grep's --files0-from option. +# FIXME: keep this file in sync with tests/misc/wc-files0-from. + +# Copyright (C) 2004-2015 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +use strict; + +(my $program_name = $0) =~ s|.*/||; + +my $prog = 'grep'; + +# Turn off localization of executable's output. address@hidden(LANGUAGE LANG LC_ALL)} = ('C') x 3; + +my @Tests = + ( + # invalid extra command line argument + ['f-extra-arg', '--files0-from=- RE no-such', {IN=>"a"}, {EXIT=>2}, + {ERR => "$prog: extra operand 'no-such'\n" + . "$prog: file operands cannot be combined with --files0-from\n" } + ], + + # missing input file + ['missing', '--files0-from=missing RE', {EXIT=>2}, + {ERR => "$prog: cannot open 'missing' for reading: " + . "No such file or directory\n"}], + + # input file name of '-' + ['minus-in-stdin', '--files0-from=- RE', '<', {IN=>{f=>'-'}}, {EXIT=>2}, + {ERR => "$prog: when reading file names from stdin, no file name of" + . " '-' allowed\n"}], + + # empty input, regular file + ['empty', 'address@hidden@ RE', {AUX=>''}, {EXIT=>1}], + + # empty input, from non-regular file + ['empty-nonreg', '--files0-from=/dev/null RE', {EXIT=>1}], + + # one NUL + ['nul-1', '--files0-from=- RE', '<', {IN=>"\0"}, {EXIT=>2}, + {ERR => "$prog: -:1: invalid zero-length file name\n"}], + + # two NULs + ['nul-2', '--files0-from=- RE', '<', {IN=>"\0\0"}, {EXIT=>2}, + {ERR => "$prog: -:1: invalid zero-length file name\n" + . "$prog: -:2: invalid zero-length file name\n"}], + + # one file name, no NUL + ['1', '--files0-from=- RE', '<', + {IN=>{f=>"g"}}, {AUX=>{g=>'RE'}}, {OUT=>"RE\n"} ], + + # one file name, with NUL + ['1a', '--files0-from=- RE', '<', + {IN=>{f=>"g\0"}}, {AUX=>{g=>'RE'}}, {OUT=>"RE\n"} ], + + # two distinct file names, no final NUL + ['2-distinct', '--files0-from=- RE', '<', + {IN=>{f=>"g\0h"}}, {AUX=>{g=>'RE'}}, {AUX=>{h=>'RE'}}, + {OUT=>"g:RE\nh:RE\n"} ], + + # two identical file names, no final NUL + ['2-identical', '--files0-from=- RE', '<', + {IN=>{f=>"g\0g"}}, {AUX=>{g=>'RE'}}, {OUT=>"g:RE\ng:RE\n"} ], + + # two identical file names, with final NUL + ['2a', '--files0-from=- RE', '<', + {IN=>{f=>"g\0g\0"}}, {AUX=>{g=>'RE'}}, {OUT=>"g:RE\ng:RE\n"} ], + + # Ensure that $prog processes FILEs following a zero-length name. + ['zero-len', '--files0-from=- RE', '<', + {IN=>{f=>"\0g\0"}}, {AUX=>{g=>'RE'}}, {OUT=>"g:RE\n"}, + {ERR => "$prog: -:1: invalid zero-length file name\n"}, {EXIT=>2} ], + + # Diagnose extra file operand when using --files0-from=F + ['extra-file', '--files0-from=F RE', 'X', + {AUX=>{F=>''}}, + {ERR => "$prog: extra operand 'X'\n" . + "$prog: file operands cannot be combined with --files0-from\n"}, + {EXIT=>2} ], + ); + +my $save_temps = $ENV{DEBUG}; +my $verbose = $ENV{VERBOSE}; + +my $fail = run_tests ($program_name, $prog, address@hidden, $save_temps, $verbose); +exit $fail; -- 2.3.7