grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.26-13-g641643e


From: Paul Eggert
Subject: grep branch, master, updated. v2.26-13-g641643e
Date: Sat, 19 Nov 2016 08:35:32 +0000 (UTC)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  641643e609c7cd4fd2898b3423d3c7ad3106f2e4 (commit)
      from  1569ee170b8eb3e5a03ca3d7d53e71fd05fd08e5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=641643e609c7cd4fd2898b3423d3c7ad3106f2e4


commit 641643e609c7cd4fd2898b3423d3c7ad3106f2e4
Author: Paul Eggert <address@hidden>
Date:   Sat Nov 19 00:25:46 2016 -0800

    grep: -Pz no longer rejects ^, $
    
    Problem reported by Stephane Chazelas (Bug#22655).
    * NEWS: Document this.
    * doc/grep.texi (grep Programs): Warn about -Pz.
    * src/pcresearch.c (reflags): New static var.
    (multibyte_locale): Remove static var; now local to Pcompile.
    (Pcompile): Check for (? and (* too.  Set reflags instead of
    dying when problematic operators are found.
    (Pexecute): Use reflags to decide whether searches should
    be multiline.
    * tests/pcre: Test new behavior.

diff --git a/NEWS b/NEWS
index b3b5049..a95c875 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,10 @@ GNU grep NEWS                                    -*- outline 
-*-
 
 ** Bug fixes
 
+  grep -Pz no longer rejects patterns containing ^ and $, and is
+  more cautious about special patterns like (?-m) and (*FAIL).
+  [bug introduced in grep-2.23]
+
   grep's use of getprogname no longer causes a build failure on HP-UX.
 
 
diff --git a/doc/grep.texi b/doc/grep.texi
index fcfad42..ac821b4 100644
--- a/doc/grep.texi
+++ b/doc/grep.texi
@@ -1125,8 +1125,10 @@ expressions), separated by newlines, any of which is to 
be matched.
 @opindex --perl-regexp
 @cindex matching Perl-compatible regular expressions
 Interpret the pattern as a Perl-compatible regular expression (PCRE).
-This is highly experimental and
+This is highly experimental, particularly when combined with the
+the @option{-z} (@option{--null-data}) option, and
 @samp{grep@ -P} may warn of unimplemented features.
address@hidden Options}.
 
 @end table
 
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 928c22c..9a13d97 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -32,6 +32,9 @@ enum { NSUB = 300 };
 /* Compiled internal form of a Perl regular expression.  */
 static pcre *cre;
 
+/* PCRE options used to compile the pattern.  */
+static int reflags;
+
 /* Additional information about the pattern.  */
 static pcre_extra *extra;
 
@@ -85,8 +88,6 @@ jit_exec (char const *subject, int search_bytes, int 
search_offset,
 /* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty
    string matches when that flag is used.  */
 static int empty_match[2];
-
-static bool multibyte_locale;
 #endif
 
 void
@@ -112,18 +113,19 @@ Pcompile (char const *pattern, size_t size)
   char *n = re;
   char const *p;
   char const *pnul;
+  bool multibyte_locale = 1 < MB_CUR_MAX;
 
-  if (1 < MB_CUR_MAX)
+  if (multibyte_locale)
     {
       if (! localeinfo.using_utf8)
         die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
-      multibyte_locale = true;
       flags |= PCRE_UTF8;
     }
 
-  /* FIXME: Remove these restrictions.  */
+  /* FIXME: Remove this restriction.  */
   if (memchr (pattern, '\n', size))
     die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
+
   if (! eolbyte)
     {
       bool escaped = false;
@@ -133,9 +135,12 @@ Pcompile (char const *pattern, size_t size)
           escaped = after_unescaped_left_bracket = false;
         else
           {
-            if (*p == '$' || (*p == '^' && !after_unescaped_left_bracket))
-              die (EXIT_TROUBLE, 0,
-                   _("unescaped ^ or $ not supported with -Pz"));
+            if (*p == '$' || (*p == '^' && !after_unescaped_left_bracket)
+                || (*p == '(' && (p[1] == '?' || p[1] == '*')))
+              {
+                flags = (flags & ~ PCRE_MULTILINE) | PCRE_DOLLAR_ENDONLY;
+                break;
+              }
             escaped = *p == '\\';
             after_unescaped_left_bracket = *p == '[';
           }
@@ -217,12 +222,15 @@ Pexecute (char *buf, size_t size, size_t *match_size,
      error.  */
   char const *subject = buf;
 
-  /* If the input is unibyte or is free of encoding errors a multiline search 
is
+  /* If the pattern has no problematic operators and the input is
+     unibyte or is free of encoding errors, a multiline search is
      typically more efficient.  Otherwise, a single-line search is
-     typically faster, so that pcre_exec doesn't waste time validating
-     the entire input buffer.  */
-  bool multiline = true;
-  if (multibyte_locale)
+     either less confusing because the problematic operators are
+     interpreted more naturally, or it is typically faster because
+     pcre_exec doesn't waste time validating the entire input
+     buffer.  */
+  bool multiline = (reflags & PCRE_MULTILINE) != 0;
+  if (multiline && (reflags & PCRE_UTF8) != 0)
     {
       multiline = ! buf_has_encoding_errors (buf, size - 1);
       buf[size - 1] = eolbyte;
diff --git a/tests/pcre b/tests/pcre
index 8f3d9a4..653ef22 100755
--- a/tests/pcre
+++ b/tests/pcre
@@ -13,8 +13,9 @@ require_pcre_
 fail=0
 
 echo | grep -P '\s*$' || fail=1
-echo | returns_ 2 grep -zP '\s$' || fail=1
+echo | grep -zP '\s$' || fail=1
 echo '.ab' | returns_ 1 grep -Pwx ab || fail=1
 echo x | grep -Pz '[^a]' || fail=1
+printf 'x\n\0' | returns_ 1 grep -zP 'x$' || fail=1
 
 Exit $fail

-----------------------------------------------------------------------

Summary of changes:
 NEWS             |    4 ++++
 doc/grep.texi    |    4 +++-
 src/pcresearch.c |   34 +++++++++++++++++++++-------------
 tests/pcre       |    3 ++-
 4 files changed, 30 insertions(+), 15 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]