grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.20-38-g9ea9254


From: Paul Eggert
Subject: grep branch, master, updated. v2.20-38-g9ea9254
Date: Wed, 17 Sep 2014 01:24:21 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  9ea9254ea58456b84ed2f0c1481ca91cdd325bf7 (commit)
       via  af3572e2651379566441c9d718dec7e809d3810d (commit)
       via  a19e4898d4fa7808fbccb0c9b0ab2cd77e3abcbe (commit)
       via  b5aa641ae5d2f4eeb05f0a09de9d178f8b678cb5 (commit)
       via  dfff75a432eec187e4c5f2c6ce99ebdadb1089c8 (commit)
       via  6e319a818ed7b15b452ed2baab2f6a38d42fd1fe (commit)
       via  cd36abd46c5e0768606979ea75a51732062f5624 (commit)
       via  564a06e761ac06c4a0bcd91ce5060118d35bf912 (commit)
       via  55a0c73874bcfaa73948fd034fb34e117266d623 (commit)
       via  77262184e3f573206a88374a8361cf3363122fb0 (commit)
      from  845b366bef3596b33194d89a22d47e64680293b2 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=9ea9254ea58456b84ed2f0c1481ca91cdd325bf7


commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date:   Mon Sep 15 18:33:19 2014 -0700

    grep: fix -P speedup bug with empty match
    
    * src/pcresearch.c (NSUB): New top-level constant, replacing
    'nsub' within Pexecute.
    (Pcompile, Pexecute): Use it.
    (Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
    match failure.
    * tests/pcre-invalid-utf8-input: Test for this bug.

diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
    string matches when that flag is used.  */
 static int empty_match[2];
 
+/* This must be at least 2; everything after that is for performance
+   in pcre_exec.  */
+enum { NSUB = 300 };
+
 void
 Pcompile (char const *pattern, size_t size)
 {
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
 # endif
   free (re);
 
-  empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
-  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+  int sub[NSUB];
+  empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+                                  PCRE_NOTBOL, sub, NSUB);
+  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
 #endif /* HAVE_LIBPCRE */
 }
 
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
   error (EXIT_TROUBLE, 0, _("internal error"));
   return -1;
 #else
-  /* This array must have at least two elements; everything after that
-     is just for performance improvement in pcre_exec.  */
-  enum { nsub = 300 };
-  int sub[nsub];
-
+  int sub[NSUB];
   char const *p = start_ptr ? start_ptr : buf;
   bool bol = p[-1] == eolbyte;
   char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t 
*match_size,
         {
           int options = bol ? 0 : PCRE_NOTBOL;
           int valid_bytes;
-          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
           if (e != PCRE_ERROR_BADUTF8)
             break;
           valid_bytes = sub[0];
-          e = (valid_bytes == 0
-               ? empty_match[bol]
-               : pcre_exec (cre, extra, p, valid_bytes, 0,
-                            options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
-                            sub, nsub));
+          if (valid_bytes == 0)
+            {
+              sub[1] = 0;
+              e = empty_match[bol];
+            }
+          else
+            e = pcre_exec (cre, extra, p, valid_bytes, 0,
+                           options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+                           sub, NSUB);
           if (e != PCRE_ERROR_NOMATCH)
             break;
           p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
 LC_ALL=en_US.UTF-8 grep -P 'k$' in
 test $? -eq 1 || fail=1
 
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
 Exit $fail

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=af3572e2651379566441c9d718dec7e809d3810d


commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date:   Mon Sep 15 18:33:19 2014 -0700

    grep: fix -P speedup bug with empty match
    
    * src/pcresearch.c (NSUB): New top-level constant, replacing
    'nsub' within Pexecute.
    (Pcompile, Pexecute): Use it.
    (Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
    match failure.
    * tests/pcre-invalid-utf8-input: Test for this bug.

diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
    string matches when that flag is used.  */
 static int empty_match[2];
 
+/* This must be at least 2; everything after that is for performance
+   in pcre_exec.  */
+enum { NSUB = 300 };
+
 void
 Pcompile (char const *pattern, size_t size)
 {
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
 # endif
   free (re);
 
-  empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
-  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+  int sub[NSUB];
+  empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+                                  PCRE_NOTBOL, sub, NSUB);
+  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
 #endif /* HAVE_LIBPCRE */
 }
 
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
   error (EXIT_TROUBLE, 0, _("internal error"));
   return -1;
 #else
-  /* This array must have at least two elements; everything after that
-     is just for performance improvement in pcre_exec.  */
-  enum { nsub = 300 };
-  int sub[nsub];
-
+  int sub[NSUB];
   char const *p = start_ptr ? start_ptr : buf;
   bool bol = p[-1] == eolbyte;
   char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t 
*match_size,
         {
           int options = bol ? 0 : PCRE_NOTBOL;
           int valid_bytes;
-          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
           if (e != PCRE_ERROR_BADUTF8)
             break;
           valid_bytes = sub[0];
-          e = (valid_bytes == 0
-               ? empty_match[bol]
-               : pcre_exec (cre, extra, p, valid_bytes, 0,
-                            options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
-                            sub, nsub));
+          if (valid_bytes == 0)
+            {
+              sub[1] = 0;
+              e = empty_match[bol];
+            }
+          else
+            e = pcre_exec (cre, extra, p, valid_bytes, 0,
+                           options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+                           sub, NSUB);
           if (e != PCRE_ERROR_NOMATCH)
             break;
           p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
 LC_ALL=en_US.UTF-8 grep -P 'k$' in
 test $? -eq 1 || fail=1
 
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
 Exit $fail

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=a19e4898d4fa7808fbccb0c9b0ab2cd77e3abcbe


commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date:   Mon Sep 15 18:33:19 2014 -0700

    grep: fix -P speedup bug with empty match
    
    * src/pcresearch.c (NSUB): New top-level constant, replacing
    'nsub' within Pexecute.
    (Pcompile, Pexecute): Use it.
    (Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
    match failure.
    * tests/pcre-invalid-utf8-input: Test for this bug.

diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
    string matches when that flag is used.  */
 static int empty_match[2];
 
+/* This must be at least 2; everything after that is for performance
+   in pcre_exec.  */
+enum { NSUB = 300 };
+
 void
 Pcompile (char const *pattern, size_t size)
 {
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
 # endif
   free (re);
 
-  empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
-  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+  int sub[NSUB];
+  empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+                                  PCRE_NOTBOL, sub, NSUB);
+  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
 #endif /* HAVE_LIBPCRE */
 }
 
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
   error (EXIT_TROUBLE, 0, _("internal error"));
   return -1;
 #else
-  /* This array must have at least two elements; everything after that
-     is just for performance improvement in pcre_exec.  */
-  enum { nsub = 300 };
-  int sub[nsub];
-
+  int sub[NSUB];
   char const *p = start_ptr ? start_ptr : buf;
   bool bol = p[-1] == eolbyte;
   char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t 
*match_size,
         {
           int options = bol ? 0 : PCRE_NOTBOL;
           int valid_bytes;
-          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
           if (e != PCRE_ERROR_BADUTF8)
             break;
           valid_bytes = sub[0];
-          e = (valid_bytes == 0
-               ? empty_match[bol]
-               : pcre_exec (cre, extra, p, valid_bytes, 0,
-                            options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
-                            sub, nsub));
+          if (valid_bytes == 0)
+            {
+              sub[1] = 0;
+              e = empty_match[bol];
+            }
+          else
+            e = pcre_exec (cre, extra, p, valid_bytes, 0,
+                           options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+                           sub, NSUB);
           if (e != PCRE_ERROR_NOMATCH)
             break;
           p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
 LC_ALL=en_US.UTF-8 grep -P 'k$' in
 test $? -eq 1 || fail=1
 
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
 Exit $fail

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=b5aa641ae5d2f4eeb05f0a09de9d178f8b678cb5


commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date:   Mon Sep 15 18:33:19 2014 -0700

    grep: fix -P speedup bug with empty match
    
    * src/pcresearch.c (NSUB): New top-level constant, replacing
    'nsub' within Pexecute.
    (Pcompile, Pexecute): Use it.
    (Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
    match failure.
    * tests/pcre-invalid-utf8-input: Test for this bug.

diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
    string matches when that flag is used.  */
 static int empty_match[2];
 
+/* This must be at least 2; everything after that is for performance
+   in pcre_exec.  */
+enum { NSUB = 300 };
+
 void
 Pcompile (char const *pattern, size_t size)
 {
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
 # endif
   free (re);
 
-  empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
-  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+  int sub[NSUB];
+  empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+                                  PCRE_NOTBOL, sub, NSUB);
+  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
 #endif /* HAVE_LIBPCRE */
 }
 
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
   error (EXIT_TROUBLE, 0, _("internal error"));
   return -1;
 #else
-  /* This array must have at least two elements; everything after that
-     is just for performance improvement in pcre_exec.  */
-  enum { nsub = 300 };
-  int sub[nsub];
-
+  int sub[NSUB];
   char const *p = start_ptr ? start_ptr : buf;
   bool bol = p[-1] == eolbyte;
   char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t 
*match_size,
         {
           int options = bol ? 0 : PCRE_NOTBOL;
           int valid_bytes;
-          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
           if (e != PCRE_ERROR_BADUTF8)
             break;
           valid_bytes = sub[0];
-          e = (valid_bytes == 0
-               ? empty_match[bol]
-               : pcre_exec (cre, extra, p, valid_bytes, 0,
-                            options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
-                            sub, nsub));
+          if (valid_bytes == 0)
+            {
+              sub[1] = 0;
+              e = empty_match[bol];
+            }
+          else
+            e = pcre_exec (cre, extra, p, valid_bytes, 0,
+                           options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+                           sub, NSUB);
           if (e != PCRE_ERROR_NOMATCH)
             break;
           p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
 LC_ALL=en_US.UTF-8 grep -P 'k$' in
 test $? -eq 1 || fail=1
 
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
 Exit $fail

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=dfff75a432eec187e4c5f2c6ce99ebdadb1089c8


commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date:   Mon Sep 15 18:33:19 2014 -0700

    grep: fix -P speedup bug with empty match
    
    * src/pcresearch.c (NSUB): New top-level constant, replacing
    'nsub' within Pexecute.
    (Pcompile, Pexecute): Use it.
    (Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
    match failure.
    * tests/pcre-invalid-utf8-input: Test for this bug.

diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
    string matches when that flag is used.  */
 static int empty_match[2];
 
+/* This must be at least 2; everything after that is for performance
+   in pcre_exec.  */
+enum { NSUB = 300 };
+
 void
 Pcompile (char const *pattern, size_t size)
 {
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
 # endif
   free (re);
 
-  empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
-  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+  int sub[NSUB];
+  empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+                                  PCRE_NOTBOL, sub, NSUB);
+  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
 #endif /* HAVE_LIBPCRE */
 }
 
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
   error (EXIT_TROUBLE, 0, _("internal error"));
   return -1;
 #else
-  /* This array must have at least two elements; everything after that
-     is just for performance improvement in pcre_exec.  */
-  enum { nsub = 300 };
-  int sub[nsub];
-
+  int sub[NSUB];
   char const *p = start_ptr ? start_ptr : buf;
   bool bol = p[-1] == eolbyte;
   char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t 
*match_size,
         {
           int options = bol ? 0 : PCRE_NOTBOL;
           int valid_bytes;
-          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
           if (e != PCRE_ERROR_BADUTF8)
             break;
           valid_bytes = sub[0];
-          e = (valid_bytes == 0
-               ? empty_match[bol]
-               : pcre_exec (cre, extra, p, valid_bytes, 0,
-                            options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
-                            sub, nsub));
+          if (valid_bytes == 0)
+            {
+              sub[1] = 0;
+              e = empty_match[bol];
+            }
+          else
+            e = pcre_exec (cre, extra, p, valid_bytes, 0,
+                           options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+                           sub, NSUB);
           if (e != PCRE_ERROR_NOMATCH)
             break;
           p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
 LC_ALL=en_US.UTF-8 grep -P 'k$' in
 test $? -eq 1 || fail=1
 
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
 Exit $fail

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=6e319a818ed7b15b452ed2baab2f6a38d42fd1fe


commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date:   Mon Sep 15 18:33:19 2014 -0700

    grep: fix -P speedup bug with empty match
    
    * src/pcresearch.c (NSUB): New top-level constant, replacing
    'nsub' within Pexecute.
    (Pcompile, Pexecute): Use it.
    (Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
    match failure.
    * tests/pcre-invalid-utf8-input: Test for this bug.

diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
    string matches when that flag is used.  */
 static int empty_match[2];
 
+/* This must be at least 2; everything after that is for performance
+   in pcre_exec.  */
+enum { NSUB = 300 };
+
 void
 Pcompile (char const *pattern, size_t size)
 {
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
 # endif
   free (re);
 
-  empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
-  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+  int sub[NSUB];
+  empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+                                  PCRE_NOTBOL, sub, NSUB);
+  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
 #endif /* HAVE_LIBPCRE */
 }
 
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
   error (EXIT_TROUBLE, 0, _("internal error"));
   return -1;
 #else
-  /* This array must have at least two elements; everything after that
-     is just for performance improvement in pcre_exec.  */
-  enum { nsub = 300 };
-  int sub[nsub];
-
+  int sub[NSUB];
   char const *p = start_ptr ? start_ptr : buf;
   bool bol = p[-1] == eolbyte;
   char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t 
*match_size,
         {
           int options = bol ? 0 : PCRE_NOTBOL;
           int valid_bytes;
-          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
           if (e != PCRE_ERROR_BADUTF8)
             break;
           valid_bytes = sub[0];
-          e = (valid_bytes == 0
-               ? empty_match[bol]
-               : pcre_exec (cre, extra, p, valid_bytes, 0,
-                            options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
-                            sub, nsub));
+          if (valid_bytes == 0)
+            {
+              sub[1] = 0;
+              e = empty_match[bol];
+            }
+          else
+            e = pcre_exec (cre, extra, p, valid_bytes, 0,
+                           options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+                           sub, NSUB);
           if (e != PCRE_ERROR_NOMATCH)
             break;
           p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
 LC_ALL=en_US.UTF-8 grep -P 'k$' in
 test $? -eq 1 || fail=1
 
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
 Exit $fail

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=cd36abd46c5e0768606979ea75a51732062f5624


commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date:   Mon Sep 15 18:33:19 2014 -0700

    grep: fix -P speedup bug with empty match
    
    * src/pcresearch.c (NSUB): New top-level constant, replacing
    'nsub' within Pexecute.
    (Pcompile, Pexecute): Use it.
    (Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
    match failure.
    * tests/pcre-invalid-utf8-input: Test for this bug.

diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
    string matches when that flag is used.  */
 static int empty_match[2];
 
+/* This must be at least 2; everything after that is for performance
+   in pcre_exec.  */
+enum { NSUB = 300 };
+
 void
 Pcompile (char const *pattern, size_t size)
 {
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
 # endif
   free (re);
 
-  empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
-  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+  int sub[NSUB];
+  empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+                                  PCRE_NOTBOL, sub, NSUB);
+  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
 #endif /* HAVE_LIBPCRE */
 }
 
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
   error (EXIT_TROUBLE, 0, _("internal error"));
   return -1;
 #else
-  /* This array must have at least two elements; everything after that
-     is just for performance improvement in pcre_exec.  */
-  enum { nsub = 300 };
-  int sub[nsub];
-
+  int sub[NSUB];
   char const *p = start_ptr ? start_ptr : buf;
   bool bol = p[-1] == eolbyte;
   char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t 
*match_size,
         {
           int options = bol ? 0 : PCRE_NOTBOL;
           int valid_bytes;
-          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
           if (e != PCRE_ERROR_BADUTF8)
             break;
           valid_bytes = sub[0];
-          e = (valid_bytes == 0
-               ? empty_match[bol]
-               : pcre_exec (cre, extra, p, valid_bytes, 0,
-                            options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
-                            sub, nsub));
+          if (valid_bytes == 0)
+            {
+              sub[1] = 0;
+              e = empty_match[bol];
+            }
+          else
+            e = pcre_exec (cre, extra, p, valid_bytes, 0,
+                           options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+                           sub, NSUB);
           if (e != PCRE_ERROR_NOMATCH)
             break;
           p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
 LC_ALL=en_US.UTF-8 grep -P 'k$' in
 test $? -eq 1 || fail=1
 
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
 Exit $fail

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=564a06e761ac06c4a0bcd91ce5060118d35bf912


commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date:   Mon Sep 15 18:33:19 2014 -0700

    grep: fix -P speedup bug with empty match
    
    * src/pcresearch.c (NSUB): New top-level constant, replacing
    'nsub' within Pexecute.
    (Pcompile, Pexecute): Use it.
    (Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
    match failure.
    * tests/pcre-invalid-utf8-input: Test for this bug.

diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
    string matches when that flag is used.  */
 static int empty_match[2];
 
+/* This must be at least 2; everything after that is for performance
+   in pcre_exec.  */
+enum { NSUB = 300 };
+
 void
 Pcompile (char const *pattern, size_t size)
 {
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
 # endif
   free (re);
 
-  empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
-  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+  int sub[NSUB];
+  empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+                                  PCRE_NOTBOL, sub, NSUB);
+  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
 #endif /* HAVE_LIBPCRE */
 }
 
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
   error (EXIT_TROUBLE, 0, _("internal error"));
   return -1;
 #else
-  /* This array must have at least two elements; everything after that
-     is just for performance improvement in pcre_exec.  */
-  enum { nsub = 300 };
-  int sub[nsub];
-
+  int sub[NSUB];
   char const *p = start_ptr ? start_ptr : buf;
   bool bol = p[-1] == eolbyte;
   char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t 
*match_size,
         {
           int options = bol ? 0 : PCRE_NOTBOL;
           int valid_bytes;
-          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
           if (e != PCRE_ERROR_BADUTF8)
             break;
           valid_bytes = sub[0];
-          e = (valid_bytes == 0
-               ? empty_match[bol]
-               : pcre_exec (cre, extra, p, valid_bytes, 0,
-                            options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
-                            sub, nsub));
+          if (valid_bytes == 0)
+            {
+              sub[1] = 0;
+              e = empty_match[bol];
+            }
+          else
+            e = pcre_exec (cre, extra, p, valid_bytes, 0,
+                           options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+                           sub, NSUB);
           if (e != PCRE_ERROR_NOMATCH)
             break;
           p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
 LC_ALL=en_US.UTF-8 grep -P 'k$' in
 test $? -eq 1 || fail=1
 
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
 Exit $fail

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=55a0c73874bcfaa73948fd034fb34e117266d623


commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date:   Mon Sep 15 18:33:19 2014 -0700

    grep: fix -P speedup bug with empty match
    
    * src/pcresearch.c (NSUB): New top-level constant, replacing
    'nsub' within Pexecute.
    (Pcompile, Pexecute): Use it.
    (Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
    match failure.
    * tests/pcre-invalid-utf8-input: Test for this bug.

diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
    string matches when that flag is used.  */
 static int empty_match[2];
 
+/* This must be at least 2; everything after that is for performance
+   in pcre_exec.  */
+enum { NSUB = 300 };
+
 void
 Pcompile (char const *pattern, size_t size)
 {
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
 # endif
   free (re);
 
-  empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
-  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+  int sub[NSUB];
+  empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+                                  PCRE_NOTBOL, sub, NSUB);
+  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
 #endif /* HAVE_LIBPCRE */
 }
 
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
   error (EXIT_TROUBLE, 0, _("internal error"));
   return -1;
 #else
-  /* This array must have at least two elements; everything after that
-     is just for performance improvement in pcre_exec.  */
-  enum { nsub = 300 };
-  int sub[nsub];
-
+  int sub[NSUB];
   char const *p = start_ptr ? start_ptr : buf;
   bool bol = p[-1] == eolbyte;
   char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t 
*match_size,
         {
           int options = bol ? 0 : PCRE_NOTBOL;
           int valid_bytes;
-          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
           if (e != PCRE_ERROR_BADUTF8)
             break;
           valid_bytes = sub[0];
-          e = (valid_bytes == 0
-               ? empty_match[bol]
-               : pcre_exec (cre, extra, p, valid_bytes, 0,
-                            options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
-                            sub, nsub));
+          if (valid_bytes == 0)
+            {
+              sub[1] = 0;
+              e = empty_match[bol];
+            }
+          else
+            e = pcre_exec (cre, extra, p, valid_bytes, 0,
+                           options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+                           sub, NSUB);
           if (e != PCRE_ERROR_NOMATCH)
             break;
           p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
 LC_ALL=en_US.UTF-8 grep -P 'k$' in
 test $? -eq 1 || fail=1
 
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
 Exit $fail

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=77262184e3f573206a88374a8361cf3363122fb0


commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date:   Mon Sep 15 18:33:19 2014 -0700

    grep: fix -P speedup bug with empty match
    
    * src/pcresearch.c (NSUB): New top-level constant, replacing
    'nsub' within Pexecute.
    (Pcompile, Pexecute): Use it.
    (Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
    match failure.
    * tests/pcre-invalid-utf8-input: Test for this bug.

diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
    string matches when that flag is used.  */
 static int empty_match[2];
 
+/* This must be at least 2; everything after that is for performance
+   in pcre_exec.  */
+enum { NSUB = 300 };
+
 void
 Pcompile (char const *pattern, size_t size)
 {
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
 # endif
   free (re);
 
-  empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
-  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+  int sub[NSUB];
+  empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+                                  PCRE_NOTBOL, sub, NSUB);
+  empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
 #endif /* HAVE_LIBPCRE */
 }
 
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
   error (EXIT_TROUBLE, 0, _("internal error"));
   return -1;
 #else
-  /* This array must have at least two elements; everything after that
-     is just for performance improvement in pcre_exec.  */
-  enum { nsub = 300 };
-  int sub[nsub];
-
+  int sub[NSUB];
   char const *p = start_ptr ? start_ptr : buf;
   bool bol = p[-1] == eolbyte;
   char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t 
*match_size,
         {
           int options = bol ? 0 : PCRE_NOTBOL;
           int valid_bytes;
-          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+          e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
           if (e != PCRE_ERROR_BADUTF8)
             break;
           valid_bytes = sub[0];
-          e = (valid_bytes == 0
-               ? empty_match[bol]
-               : pcre_exec (cre, extra, p, valid_bytes, 0,
-                            options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
-                            sub, nsub));
+          if (valid_bytes == 0)
+            {
+              sub[1] = 0;
+              e = empty_match[bol];
+            }
+          else
+            e = pcre_exec (cre, extra, p, valid_bytes, 0,
+                           options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+                           sub, NSUB);
           if (e != PCRE_ERROR_NOMATCH)
             break;
           p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
 LC_ALL=en_US.UTF-8 grep -P 'k$' in
 test $? -eq 1 || fail=1
 
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
 Exit $fail

-----------------------------------------------------------------------

Summary of changes:
 NEWS                            |    4 +
 bootstrap.conf                  |    1 +
 cfg.mk                          |    4 +
 configure.ac                    |    5 +
 doc/grep.texi                   |    3 +-
 src/grep.c                      |  398 ++++++++++++++++++++-------------------
 src/grep.h                      |    8 +-
 src/pcresearch.c                |   51 ++++--
 src/search.h                    |   19 ++
 src/searchutils.c               |   29 ++--
 src/system.h                    |    9 +-
 tests/backref-multibyte-slow    |    2 +-
 tests/high-bit-range            |    2 +-
 tests/invalid-multibyte-infloop |   14 ++-
 tests/pcre-invalid-utf8-input   |    5 +
 15 files changed, 323 insertions(+), 231 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]