From e5bfd1d64ed1ceb1ec6423e8c89aa936e24109e5 Mon Sep 17 00:00:00 2001
From: Jim Meyering
Date: Sat, 27 Jun 2015 08:44:31 -0700
Subject: [PATCH] accept new option: --files0-from=F
* src/grep.c Include , "argv-iter.h" and "quotearg.h".
(usage): Describe the new option, and adjust the `Usage':
with this option, no FILE may be specified on the command line.
(wrap_state): New file-scoped global.
(wrapped_argv_iter_n_args, wrapped_argv_iter): New functions.
(main): Handle the new option.
* bootstrap.conf (gnulib_modules): Add argv-iter.
* configure.ac: Set the AM_CONDITIONAL variable, HAVE_PERL.
* tests/files0-from.pl: New file.
* tests/Makefile.am (TESTS): Add it.
* NEWS (New features): Mention it.
---
NEWS | 5 ++
bootstrap.conf | 1 +
src/grep.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++-----
src/system.h | 7 ++
tests/Makefile.am | 1 +
tests/files0-from.pl | 100 +++++++++++++++++++++++++
6 files changed, 301 insertions(+), 18 deletions(-)
create mode 100755 tests/files0-from.pl
diff --git a/NEWS b/NEWS
index 35c4aad..393ebf4 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,11 @@ GNU grep NEWS -*- outline -*-
* Noteworthy changes in release ?.? (????-??-??) [?]
+** New features
+
+ grep accepts a new option --files0-from=FILE, where FILE contains a
+ list of NUL-terminated file names.
+
** Improvements
When building grep, 'configure' now uses PCRE's pkg-config module for
diff --git a/bootstrap.conf b/bootstrap.conf
index 7842928..5e96142 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -25,6 +25,7 @@ gnulib_modules='
alloca
announce-gen
argmatch
+argv-iter
binary-io
btowc
c-ctype
diff --git a/src/grep.c b/src/grep.c
index a735ea5..6cd9f6a 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -26,9 +26,11 @@
#include
#include
#include
+#include
#include "system.h"
#include "argmatch.h"
+#include "argv-iter.h"
#include "c-ctype.h"
#include "closeout.h"
#include "colorize.h"
@@ -43,6 +45,7 @@
#include "progname.h"
#include "propername.h"
#include "quote.h"
+#include "quotearg.h"
#include "safe-read.h"
#include "search.h"
#include "version-etc.h"
@@ -310,6 +313,7 @@ enum
EXCLUDE_DIRECTORY_OPTION,
EXCLUDE_OPTION,
EXCLUDE_FROM_OPTION,
+ FILES0_FROM_OPTION,
GROUP_SEPARATOR_OPTION,
INCLUDE_OPTION,
LINE_BUFFERED_OPTION,
@@ -340,6 +344,7 @@ static struct option const long_options[] =
{"file", required_argument, NULL, 'f'},
{"files-with-matches", no_argument, NULL, 'l'},
{"files-without-match", no_argument, NULL, 'L'},
+ {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
{"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
{"help", no_argument, &show_help, 1},
{"include", required_argument, NULL, INCLUDE_OPTION},
@@ -1753,8 +1758,11 @@ usage (int status)
{
if (status != 0)
{
- fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"),
- program_name);
+ fprintf (stderr, _("\
+Usage: %s [OPTION]... PATTERN [FILE]...\n\
+ or: %s [OPTION]... --files0-from=F PATTERN\n\
+"),
+ program_name, program_name);
fprintf (stderr, _("Try '%s --help' for more information.\n"),
program_name);
}
@@ -1783,6 +1791,9 @@ Regexp selection and interpretation:\n"), program_name);
printf (_("\
\n\
Miscellaneous:\n\
+ --files0-from=F read input from the files specified by\n\
+ NUL-terminated names in file F;\n\
+ If F is - then read names from standard input\n\
-s, --no-messages suppress error messages\n\
-v, --invert-match select non-matching lines\n\
-V, --version display version information and exit\n\
@@ -2159,6 +2170,62 @@ fgrep_to_grep_pattern (size_t len, char const *keys,
*new_len = p - *new_keys;
}
+/* This global and the following two wrapper functions are solely
+ to support the case in which we are reading file names from F, given
+ --files0-from=F. We need to know, when processing the first file,
+ if there is at least one more file name in F (to decide whether to
+ print the "FILENAME: " prefix), yet the argv-iter module provides no
+ mechanism to peek ahead into that stream. That is the reason for the
+ wrapped_argv_iter function. The other function is required because we
+ print the index (analogous to line number) when diagnosing an empty file
+ name, and right after we've peeked into the stream, the value of that
+ function would be one too large. It compensates in that sole case. */
+static unsigned int wrap_state = 0;
+
+/* See above. */
+static size_t
+wrapped_argv_iter_n_args (struct argv_iterator const *ai)
+{
+ size_t n = argv_iter_n_args (ai);
+ return n == 2 && wrap_state == 1 ? 1 : n;
+}
+
+/* Just like argv_iter, but upon first call, set *TWO_OR_MORE,
+ to true when there are two or more file names. We must save
+ in malloc'd storage the second string, and be careful to free
+ it if this function is called again. We deliberately do not
+ worry about leaking that value when exiting between the one-time
+ allocation and one-time free. */
+static char *
+wrapped_argv_iter (struct argv_iterator *ai, enum argv_iter_err *err,
+ bool *two_or_more)
+{
+ static char *f0;
+ static char *f1;
+ static enum argv_iter_err err_1;
+ if (wrap_state == 0)
+ {
+ f0 = argv_iter (ai, err);
+ if (f0)
+ {
+ f0 = xstrdup (f0); /* deliberate, possible one-time leak */
+ f1 = argv_iter (ai, &err_1);
+ *two_or_more = f1 != NULL;
+ }
+ wrap_state = 1;
+ return f0;
+ }
+ else if (wrap_state == 1)
+ {
+ wrap_state = 2;
+ *err = err_1;
+ free (f0);
+ return f1;
+ }
+
+ return argv_iter (ai, err);
+}
+
int
main (int argc, char **argv)
{
@@ -2170,7 +2237,9 @@ main (int argc, char **argv)
int prev_optind, last_recursive;
int fread_errno;
intmax_t default_context;
+ char *files_from = NULL;
FILE *fp;
+
exit_failure = EXIT_TROUBLE;
initialize_main (&argc, &argv);
set_program_name (argv[0]);
@@ -2468,6 +2537,10 @@ main (int argc, char **argv)
add_exclude (excluded_directory_patterns, optarg, EXCLUDE_WILDCARDS);
break;
+ case FILES0_FROM_OPTION:
+ files_from = optarg;
+ break;
+
case GROUP_SEPARATOR_OPTION:
group_separator = optarg;
break;
@@ -2584,9 +2657,6 @@ main (int argc, char **argv)
skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0)
== out_invert);
- if ((argc - optind > 1 && !no_filenames) || with_filenames)
- out_file = 1;
-
#ifdef SET_BINARY
/* Output is set to binary mode because we shouldn't convert
NL to CR-LF pairs, especially when grepping binary files. */
@@ -2601,26 +2671,125 @@ main (int argc, char **argv)
devices = READ_DEVICES;
char *const *files;
- if (optind < argc)
+ struct argv_iterator *ai;
+ if (files_from)
{
- files = argv + optind;
- }
- else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive)
- {
- static char *const cwd_only[] = { (char *) ".", NULL };
- files = cwd_only;
- omit_dot_slash = true;
+ /* When using --files0-from=F, you may not specify any files
+ on the command-line. */
+ if (optind < argc)
+ {
+ /* Trigger with e.g., echo a|src/grep --files0-from=- PAT x */
+ error (0, 0, _("extra operand %s"), quote (argv[optind]));
+ error (EXIT_TROUBLE, 0,
+ _("file operands cannot be combined with --files0-from"));
+ }
+
+ if (! (STREQ (files_from, "-") || freopen (files_from, "r", stdin)))
+ error (EXIT_TROUBLE, errno, _("cannot open %s for reading"),
+ quote (files_from));
+
+ ai = argv_iter_init_stream (stdin);
}
else
{
- static char *const stdin_only[] = { (char *) "-", NULL };
- files = stdin_only;
+ if (optind < argc)
+ files = argv + optind;
+ else if (directories == RECURSE_DIRECTORIES
+ && prepended < last_recursive)
+ {
+ static char *const cwd_only[] = { (char *) ".", NULL };
+ files = cwd_only;
+ omit_dot_slash = true;
+ }
+ else
+ {
+ static char *const stdin_only[] = { (char *) "-", NULL };
+ files = stdin_only;
+ }
+ ai = argv_iter_init_argv ((char **) files);
}
+ if (!ai)
+ xalloc_die ();
+
+ int i;
bool status = true;
- do
- status &= grep_command_line_arg (*files++);
- while (*files != NULL);
+ for (i = 0; /* */; i++)
+ {
+ bool skip_file = false;
+ enum argv_iter_err ai_err;
+ bool two_or_more IF_LINT (= true);
+ /* Using this wrapped argv_iter function is so that we know
+ when there are two or more names in the input specified via
+ --files0-from=F (required in order to set OUT_FILE). That
+ requires to cache any second name, which in turn requires to
+ use a wrapped_argv_iter_n_args function below, when including
+ the position of an offending zero-length name in such an
+ input file. */
+ char *file_name = wrapped_argv_iter (ai, &ai_err, &two_or_more);
+ if (i == 0 && ((two_or_more && !no_filenames) || with_filenames))
+ out_file = 1;
+ if (!file_name)
+ {
+ switch (ai_err)
+ {
+ case AI_ERR_EOF:
+ goto argv_iter_done;
+ case AI_ERR_READ:
+ error (0, errno, _("%s: read error"),
+ quotearg_colon (files_from));
+ status = false;
+ goto argv_iter_done;
+ case AI_ERR_MEM:
+ xalloc_die ();
+ default:
+ assert (!"unexpected error code from argv_iter");
+ }
+ }
+ if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
+ {
+ /* Give a better diagnostic in an unusual case:
+ printf - | grep --files0-from=- RE */
+ error (0, 0, _("when reading file names from stdin, "
+ "no file name of %s allowed"),
+ quote (file_name));
+ skip_file = true;
+ }
+
+ if (!file_name[0])
+ {
+ /* Diagnose a zero-length file name. When it's one
+ among many, knowing the record number may help.
+ FIXME: currently print the record number only with
+ --files0-from=FILE. Maybe do it for argv, too? */
+ if (files_from == NULL)
+ error (0, 0, "%s", _("invalid zero-length file name"));
+ else
+ {
+ /* Using the standard 'filename:line-number:' prefix here is
+ not totally appropriate, since NUL is the separator, not NL,
+ but it might be better than nothing. */
+ unsigned long int file_number = wrapped_argv_iter_n_args (ai);
+ error (0, 0, "%s:%lu: %s", quotearg_colon (files_from),
+ file_number, _("invalid zero-length file name"));
+ }
+ skip_file = true;
+ }
+
+ if (skip_file)
+ errseen = true;
+ else
+ status &= grep_command_line_arg (file_name);
+ }
+ argv_iter_done:
+
+ /* No arguments on the command line is fine. That means read from stdin.
+ However, no arguments on the --files0-from input stream means don't
+ read anything. */
+ if (status && !files_from && wrapped_argv_iter_n_args (ai) == 0)
+ status &= grep_command_line_arg ("-");
+
+ argv_iter_free (ai);
/* We register via atexit() to test stdout. */
return errseen ? EXIT_TROUBLE : status;
diff --git a/src/system.h b/src/system.h
index 15a1abb..474c6fd 100644
--- a/src/system.h
+++ b/src/system.h
@@ -107,4 +107,11 @@ static _GL_UNUSED void
__asan_unpoison_memory_region (void const volatile *addr, size_t size) { }
#endif
+/* Use this to suppress gcc's '...may be used before initialized' warnings. */
+#ifdef lint
+# define IF_LINT(Code) Code
+#else
+# define IF_LINT(Code) /* empty */
+#endif
+
#endif
diff --git a/tests/Makefile.am b/tests/Makefile.am
index ccd0196..52adad9 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -86,6 +86,7 @@ TESTS = \
fedora \
fgrep-infloop \
file \
+ files0-from.pl \
fmbtest \
foad1 \
grep-dev-null \
diff --git a/tests/files0-from.pl b/tests/files0-from.pl
new file mode 100755
index 0000000..8e3f6cb
--- /dev/null
+++ b/tests/files0-from.pl
@@ -0,0 +1,100 @@
+#!/usr/bin/perl
+# Exercise grep's --files0-from option.
+# FIXME: keep this file in sync with tests/misc/wc-files0-from.
+
+# Copyright (C) 2004-2015 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+use strict;
+
+(my $program_name = $0) =~ s|.*/||;
+
+my $prog = 'grep';
+
+# Turn off localization of executable's output.
address@hidden(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+my @Tests =
+ (
+ # invalid extra command line argument
+ ['f-extra-arg', '--files0-from=- RE no-such', {IN=>"a"}, {EXIT=>2},
+ {ERR => "$prog: extra operand 'no-such'\n"
+ . "$prog: file operands cannot be combined with --files0-from\n" }
+ ],
+
+ # missing input file
+ ['missing', '--files0-from=missing RE', {EXIT=>2},
+ {ERR => "$prog: cannot open 'missing' for reading: "
+ . "No such file or directory\n"}],
+
+ # input file name of '-'
+ ['minus-in-stdin', '--files0-from=- RE', '<', {IN=>{f=>'-'}}, {EXIT=>2},
+ {ERR => "$prog: when reading file names from stdin, no file name of"
+ . " '-' allowed\n"}],
+
+ # empty input, regular file
+ ['empty', 'address@hidden@ RE', {AUX=>''}, {EXIT=>1}],
+
+ # empty input, from non-regular file
+ ['empty-nonreg', '--files0-from=/dev/null RE', {EXIT=>1}],
+
+ # one NUL
+ ['nul-1', '--files0-from=- RE', '<', {IN=>"\0"}, {EXIT=>2},
+ {ERR => "$prog: -:1: invalid zero-length file name\n"}],
+
+ # two NULs
+ ['nul-2', '--files0-from=- RE', '<', {IN=>"\0\0"}, {EXIT=>2},
+ {ERR => "$prog: -:1: invalid zero-length file name\n"
+ . "$prog: -:2: invalid zero-length file name\n"}],
+
+ # one file name, no NUL
+ ['1', '--files0-from=- RE', '<',
+ {IN=>{f=>"g"}}, {AUX=>{g=>'RE'}}, {OUT=>"RE\n"} ],
+
+ # one file name, with NUL
+ ['1a', '--files0-from=- RE', '<',
+ {IN=>{f=>"g\0"}}, {AUX=>{g=>'RE'}}, {OUT=>"RE\n"} ],
+
+ # two distinct file names, no final NUL
+ ['2-distinct', '--files0-from=- RE', '<',
+ {IN=>{f=>"g\0h"}}, {AUX=>{g=>'RE'}}, {AUX=>{h=>'RE'}},
+ {OUT=>"g:RE\nh:RE\n"} ],
+
+ # two identical file names, no final NUL
+ ['2-identical', '--files0-from=- RE', '<',
+ {IN=>{f=>"g\0g"}}, {AUX=>{g=>'RE'}}, {OUT=>"g:RE\ng:RE\n"} ],
+
+ # two identical file names, with final NUL
+ ['2a', '--files0-from=- RE', '<',
+ {IN=>{f=>"g\0g\0"}}, {AUX=>{g=>'RE'}}, {OUT=>"g:RE\ng:RE\n"} ],
+
+ # Ensure that $prog processes FILEs following a zero-length name.
+ ['zero-len', '--files0-from=- RE', '<',
+ {IN=>{f=>"\0g\0"}}, {AUX=>{g=>'RE'}}, {OUT=>"g:RE\n"},
+ {ERR => "$prog: -:1: invalid zero-length file name\n"}, {EXIT=>2} ],
+
+ # Diagnose extra file operand when using --files0-from=F
+ ['extra-file', '--files0-from=F RE', 'X',
+ {AUX=>{F=>''}},
+ {ERR => "$prog: extra operand 'X'\n" .
+ "$prog: file operands cannot be combined with --files0-from\n"},
+ {EXIT=>2} ],
+ );
+
+my $save_temps = $ENV{DEBUG};
+my $verbose = $ENV{VERBOSE};
+
+my $fail = run_tests ($program_name, $prog, address@hidden, $save_temps, $verbose);
+exit $fail;
--
2.3.7