grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v3.3-82-g9393b97


From: Paul Eggert
Subject: grep branch, master, updated. v3.3-82-g9393b97
Date: Mon, 7 Sep 2020 22:49:37 -0400 (EDT)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  9393b977015bf7944cec1d71ad3972c101bdb4b8 (commit)
       via  0ede35a6cd21093560de8bd9843263ba199abf1f (commit)
       via  71b5c685d0dd3e9b0298e1a9c37b32fbedece340 (commit)
       via  33e4602c96e639ec7d56b92ffe3614aa700d3d76 (commit)
      from  7ded8efd721ce2abf2b781931e0a0bdd46b156d7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=9393b977015bf7944cec1d71ad3972c101bdb4b8


commit 9393b977015bf7944cec1d71ad3972c101bdb4b8
Author: Paul Eggert <eggert@cs.ucla.edu>
Date:   Mon Sep 7 19:44:21 2020 -0700

    Prefer rawmemchr to memchr when it’s easy
    
    * bootstrap.conf (gnulib_modules): Add rawmemchr.
    * src/dfasearch.c (GEAcompile, EGexecute):
    * src/grep.c (update_patterns, prpending, prtext):
    * src/kwsearch.c (Fcompile, Fexecute):
    * src/pcresearch.c (Pcompile, Pexecute):
    Simplify (and presumably speed up a little) by using rawmemchr
    with a sentinel, instead of using memchr.

diff --git a/bootstrap.conf b/bootstrap.conf
index fceb318..4268623 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -73,6 +73,7 @@ openat-safer
 perl
 propername
 quote
+rawmemchr
 readme-release
 realloc-gnu
 regex
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 256cd39..4d3f4b2 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -174,6 +174,10 @@ regex_compile (struct dfa_comp *dc, char const *p, 
ptrdiff_t len,
   return false;
 }
 
+/* Compile PATTERN, containing SIZE bytes that are followed by '\n'.
+   SYNTAX_BITS specifies whether PATTERN uses style -G, -E, or -A.
+   Return a description of the compiled pattern.  */
+
 void *
 GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
 {
@@ -213,15 +217,8 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
 
   do
     {
-      size_t len;
-      char const *sep = memchr (p, '\n', patlim - p);
-      if (sep)
-        {
-          len = sep - p;
-          sep++;
-        }
-      else
-        len = patlim - p;
+      char const *sep = rawmemchr (p, '\n');
+      ptrdiff_t len = sep - p;
 
       bool backref = possible_backrefs_in_pattern (p, len, bs_safe);
 
@@ -247,7 +244,7 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
       if (!regex_compile (dc, p, len, dc->pcount, lineno, !backref))
         compilation_failed = true;
 
-      p = sep;
+      p = sep + 1;
       lineno++;
 
       if (backref)
@@ -256,12 +253,12 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
           prev = p;
         }
     }
-  while (p);
+  while (p <= patlim);
 
   if (compilation_failed)
     exit (EXIT_TROUBLE);
 
-  if (prev != NULL)
+  if (prev <= patlim)
     {
       if (pattern < prev)
         {
@@ -383,14 +380,19 @@ EGexecute (void *vdc, char const *buf, size_t size, 
size_t *match_size,
                  greater of the latter two values; this temporarily prefers
                  the DFA to KWset.  */
               exact_kwset_match = kwsm.index < dc->kwset_exact_matches;
-              end = ((exact_kwset_match || !dfafast
-                      || MAX (16, match - beg) < (match - prev_beg) >> 2)
-                     ? match
-                     : MAX (16, match - beg) < (buflim - prev_beg) >> 2
-                     ? prev_beg + 4 * MAX (16, match - beg)
-                     : buflim);
-              end = memchr (end, eol, buflim - end);
-              end = end ? end + 1 : buflim;
+              if (exact_kwset_match || !dfafast
+                  || MAX (16, match - beg) < (match - prev_beg) >> 2)
+                {
+                  end = rawmemchr (match, eol);
+                  end++;
+                }
+              else if (MAX (16, match - beg) < (buflim - prev_beg) >> 2)
+                {
+                  end = rawmemchr (prev_beg + 4 * MAX (16, match - beg), eol);
+                  end++;
+                }
+              else
+                end = buflim;
 
               if (exact_kwset_match)
                 {
@@ -425,8 +427,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
                   beg++;
                   dfa_beg = beg;
                 }
-              end = memchr (next_beg, eol, buflim - next_beg);
-              end = end ? end + 1 : buflim;
+              end = rawmemchr (next_beg, eol);
+              end++;
 
               count = 0;
             }
@@ -446,8 +448,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
               beg = memrchr (buf, eol, next_beg - buf);
               beg++;
             }
-          end = memchr (next_beg, eol, buflim - next_beg);
-          end = end ? end + 1 : buflim;
+          end = rawmemchr (next_beg, eol);
+          end++;
 
           /* Successful, no back-references encountered! */
           if (!backref)
diff --git a/src/grep.c b/src/grep.c
index ce2f291..d058a76 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -164,7 +164,7 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, 
ptrdiff_t size,
   ptrdiff_t patsize;
   for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
     {
-      char const *patend = memchr (src, '\n', srclim - src);
+      char const *patend = rawmemchr (src, '\n');
       patsize = patend + 1 - src;
       memmove (dst, src, patsize);
 
@@ -1104,8 +1104,7 @@ static void
 nlscan (char const *lim)
 {
   size_t newlines = 0;
-  char const *beg;
-  for (beg = lastnl; beg < lim; beg++)
+  for (char const *beg = lastnl; beg < lim; beg++)
     {
       beg = memchr (beg, eolbyte, lim - beg);
       if (!beg)
@@ -1353,7 +1352,7 @@ prpending (char const *lim)
     lastout = bufbeg;
   for (; 0 < pending && lastout < lim; pending--)
     {
-      char *nl = memchr (lastout, eolbyte, lim - lastout);
+      char *nl = rawmemchr (lastout, eolbyte);
       prline (lastout, nl + 1, SEP_CHAR_REJECTED);
     }
 }
@@ -1394,7 +1393,7 @@ prtext (char *beg, char *lim)
 
       while (p < beg)
         {
-          char *nl = memchr (p, eol, beg - p);
+          char *nl = rawmemchr (p, eol);
           nl++;
           prline (p, nl, SEP_CHAR_REJECTED);
           p = nl;
@@ -1407,7 +1406,7 @@ prtext (char *beg, char *lim)
       /* One or more lines are output.  */
       for (n = 0; p < lim && n < outleft; n++)
         {
-          char *nl = memchr (p, eol, lim - p);
+          char *nl = rawmemchr (p, eol);
           nl++;
           if (!out_quiet)
             prline (p, nl, SEP_CHAR_SELECTED);
diff --git a/src/kwsearch.c b/src/kwsearch.c
index 6f6d4d0..7081060 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -43,14 +43,13 @@ struct kwsearch
   void *re;
 };
 
-/* Compile the -F style PATTERN, containing SIZE bytes.  Return a
-   description of the compiled pattern.  */
+/* Compile the -F style PATTERN, containing SIZE bytes that are
+   followed by '\n'.  Return a description of the compiled pattern.  */
 
 void *
 Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
 {
   kwset_t kwset;
-  ptrdiff_t total = size;
   char *buf = NULL;
   size_t bufalloc = 0;
 
@@ -59,23 +58,12 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
   char const *p = pattern;
   do
     {
-      ptrdiff_t len;
-      char const *sep = memchr (p, '\n', total);
-      if (sep)
-        {
-          len = sep - p;
-          sep++;
-          total -= (len + 1);
-        }
-      else
-        {
-          len = total;
-          total = 0;
-        }
+      char const *sep = rawmemchr (p, '\n');
+      ptrdiff_t len = sep - p;
 
       if (match_lines)
         {
-          if (eolbyte == '\n' && pattern < p && sep)
+          if (eolbyte == '\n' && pattern < p)
             p--;
           else
             {
@@ -94,9 +82,9 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
         }
       kwsincr (kwset, p, len);
 
-      p = sep;
+      p = sep + 1;
     }
-  while (p);
+  while (p <= pattern + size);
 
   free (buf);
   ptrdiff_t words = kwswords (kwset);
@@ -259,8 +247,14 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
                                                kwsearch->size,
                                                RE_SYNTAX_GREP);
                   }
-                end = memchr (beg + len, eol, (buf + size) - (beg + len));
-                end = end ? end + 1 : buf + size;
+                if (beg + len < buf + size)
+                  {
+                    end = rawmemchr (beg + len, eol);
+                    end++;
+                  }
+                else
+                  end = buf + size;
+
                 if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL)
                     != (size_t) -1)
                   goto success_match_words;
@@ -285,8 +279,13 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
   return -1;
 
  success:
-  end = memchr (beg + len, eol, (buf + size) - (beg + len));
-  end = end ? end + 1 : buf + size;
+  if (beg + len < buf + size)
+    {
+      end = rawmemchr (beg + len, eol);
+      end++;
+    }
+  else
+    end = buf + size;
  success_match_words:
   beg = memrchr (buf, eol, beg - buf);
   beg = beg ? beg + 1 : buf;
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 15a6a59..2fcbf8e 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -107,6 +107,9 @@ jit_exec (struct pcre_comp *pc, char const *subject, int 
search_bytes,
     }
 }
 
+/* Compile the -P style PATTERN, containing SIZE bytes that are
+   followed by '\n'.  Return a description of the compiled pattern.  */
+
 void *
 Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
 {
@@ -120,7 +123,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
                          sizeof xprefix - 1 + sizeof xsuffix - 1);
   char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
   int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0);
-  char const *patlim = pattern + size;
+  char *patlim = pattern + size;
   char *n = re;
   char const *p;
   char const *pnul;
@@ -134,7 +137,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
     }
 
   /* FIXME: Remove this restriction.  */
-  if (memchr (pattern, '\n', size))
+  if (rawmemchr (pattern, '\n') != patlim)
     die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
 
   *n = '\0';
@@ -148,7 +151,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
      replace each NUL byte in the pattern with the four characters
      "\000", removing a preceding backslash if there are an odd
      number of backslashes before the NUL.  */
-  for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
+  *patlim = '\0';
+  for (p = pattern; (pnul = p + strlen (p)) < patlim; p = pnul + 1)
     {
       memcpy (n, p, pnul - p);
       n += pnul - p;
@@ -158,10 +162,10 @@ Pcompile (char *pattern, size_t size, reg_syntax_t 
ignored)
       strcpy (n, "\\000");
       n += 4;
     }
-
-  memcpy (n, p, patlim - p);
+  memcpy (n, p, patlim - p + 1);
   n += patlim - p;
-  *n = '\0';
+  *patlim = '\n';
+
   if (match_words)
     strcpy (n, wsuffix);
   if (match_lines)
@@ -219,7 +223,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
          PCRE_MULTILINE for performance, the performance wasn't always
          better and the correctness issues were too puzzling.  See
          Bug#22655.  */
-      line_end = memchr (p, eolbyte, buf + size - p);
+      line_end = rawmemchr (p, eolbyte);
       if (INT_MAX < line_end - p)
         die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
 

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=0ede35a6cd21093560de8bd9843263ba199abf1f


commit 9393b977015bf7944cec1d71ad3972c101bdb4b8
Author: Paul Eggert <eggert@cs.ucla.edu>
Date:   Mon Sep 7 19:44:21 2020 -0700

    Prefer rawmemchr to memchr when it’s easy
    
    * bootstrap.conf (gnulib_modules): Add rawmemchr.
    * src/dfasearch.c (GEAcompile, EGexecute):
    * src/grep.c (update_patterns, prpending, prtext):
    * src/kwsearch.c (Fcompile, Fexecute):
    * src/pcresearch.c (Pcompile, Pexecute):
    Simplify (and presumably speed up a little) by using rawmemchr
    with a sentinel, instead of using memchr.

diff --git a/bootstrap.conf b/bootstrap.conf
index fceb318..4268623 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -73,6 +73,7 @@ openat-safer
 perl
 propername
 quote
+rawmemchr
 readme-release
 realloc-gnu
 regex
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 256cd39..4d3f4b2 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -174,6 +174,10 @@ regex_compile (struct dfa_comp *dc, char const *p, 
ptrdiff_t len,
   return false;
 }
 
+/* Compile PATTERN, containing SIZE bytes that are followed by '\n'.
+   SYNTAX_BITS specifies whether PATTERN uses style -G, -E, or -A.
+   Return a description of the compiled pattern.  */
+
 void *
 GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
 {
@@ -213,15 +217,8 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
 
   do
     {
-      size_t len;
-      char const *sep = memchr (p, '\n', patlim - p);
-      if (sep)
-        {
-          len = sep - p;
-          sep++;
-        }
-      else
-        len = patlim - p;
+      char const *sep = rawmemchr (p, '\n');
+      ptrdiff_t len = sep - p;
 
       bool backref = possible_backrefs_in_pattern (p, len, bs_safe);
 
@@ -247,7 +244,7 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
       if (!regex_compile (dc, p, len, dc->pcount, lineno, !backref))
         compilation_failed = true;
 
-      p = sep;
+      p = sep + 1;
       lineno++;
 
       if (backref)
@@ -256,12 +253,12 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
           prev = p;
         }
     }
-  while (p);
+  while (p <= patlim);
 
   if (compilation_failed)
     exit (EXIT_TROUBLE);
 
-  if (prev != NULL)
+  if (prev <= patlim)
     {
       if (pattern < prev)
         {
@@ -383,14 +380,19 @@ EGexecute (void *vdc, char const *buf, size_t size, 
size_t *match_size,
                  greater of the latter two values; this temporarily prefers
                  the DFA to KWset.  */
               exact_kwset_match = kwsm.index < dc->kwset_exact_matches;
-              end = ((exact_kwset_match || !dfafast
-                      || MAX (16, match - beg) < (match - prev_beg) >> 2)
-                     ? match
-                     : MAX (16, match - beg) < (buflim - prev_beg) >> 2
-                     ? prev_beg + 4 * MAX (16, match - beg)
-                     : buflim);
-              end = memchr (end, eol, buflim - end);
-              end = end ? end + 1 : buflim;
+              if (exact_kwset_match || !dfafast
+                  || MAX (16, match - beg) < (match - prev_beg) >> 2)
+                {
+                  end = rawmemchr (match, eol);
+                  end++;
+                }
+              else if (MAX (16, match - beg) < (buflim - prev_beg) >> 2)
+                {
+                  end = rawmemchr (prev_beg + 4 * MAX (16, match - beg), eol);
+                  end++;
+                }
+              else
+                end = buflim;
 
               if (exact_kwset_match)
                 {
@@ -425,8 +427,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
                   beg++;
                   dfa_beg = beg;
                 }
-              end = memchr (next_beg, eol, buflim - next_beg);
-              end = end ? end + 1 : buflim;
+              end = rawmemchr (next_beg, eol);
+              end++;
 
               count = 0;
             }
@@ -446,8 +448,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
               beg = memrchr (buf, eol, next_beg - buf);
               beg++;
             }
-          end = memchr (next_beg, eol, buflim - next_beg);
-          end = end ? end + 1 : buflim;
+          end = rawmemchr (next_beg, eol);
+          end++;
 
           /* Successful, no back-references encountered! */
           if (!backref)
diff --git a/src/grep.c b/src/grep.c
index ce2f291..d058a76 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -164,7 +164,7 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, 
ptrdiff_t size,
   ptrdiff_t patsize;
   for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
     {
-      char const *patend = memchr (src, '\n', srclim - src);
+      char const *patend = rawmemchr (src, '\n');
       patsize = patend + 1 - src;
       memmove (dst, src, patsize);
 
@@ -1104,8 +1104,7 @@ static void
 nlscan (char const *lim)
 {
   size_t newlines = 0;
-  char const *beg;
-  for (beg = lastnl; beg < lim; beg++)
+  for (char const *beg = lastnl; beg < lim; beg++)
     {
       beg = memchr (beg, eolbyte, lim - beg);
       if (!beg)
@@ -1353,7 +1352,7 @@ prpending (char const *lim)
     lastout = bufbeg;
   for (; 0 < pending && lastout < lim; pending--)
     {
-      char *nl = memchr (lastout, eolbyte, lim - lastout);
+      char *nl = rawmemchr (lastout, eolbyte);
       prline (lastout, nl + 1, SEP_CHAR_REJECTED);
     }
 }
@@ -1394,7 +1393,7 @@ prtext (char *beg, char *lim)
 
       while (p < beg)
         {
-          char *nl = memchr (p, eol, beg - p);
+          char *nl = rawmemchr (p, eol);
           nl++;
           prline (p, nl, SEP_CHAR_REJECTED);
           p = nl;
@@ -1407,7 +1406,7 @@ prtext (char *beg, char *lim)
       /* One or more lines are output.  */
       for (n = 0; p < lim && n < outleft; n++)
         {
-          char *nl = memchr (p, eol, lim - p);
+          char *nl = rawmemchr (p, eol);
           nl++;
           if (!out_quiet)
             prline (p, nl, SEP_CHAR_SELECTED);
diff --git a/src/kwsearch.c b/src/kwsearch.c
index 6f6d4d0..7081060 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -43,14 +43,13 @@ struct kwsearch
   void *re;
 };
 
-/* Compile the -F style PATTERN, containing SIZE bytes.  Return a
-   description of the compiled pattern.  */
+/* Compile the -F style PATTERN, containing SIZE bytes that are
+   followed by '\n'.  Return a description of the compiled pattern.  */
 
 void *
 Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
 {
   kwset_t kwset;
-  ptrdiff_t total = size;
   char *buf = NULL;
   size_t bufalloc = 0;
 
@@ -59,23 +58,12 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
   char const *p = pattern;
   do
     {
-      ptrdiff_t len;
-      char const *sep = memchr (p, '\n', total);
-      if (sep)
-        {
-          len = sep - p;
-          sep++;
-          total -= (len + 1);
-        }
-      else
-        {
-          len = total;
-          total = 0;
-        }
+      char const *sep = rawmemchr (p, '\n');
+      ptrdiff_t len = sep - p;
 
       if (match_lines)
         {
-          if (eolbyte == '\n' && pattern < p && sep)
+          if (eolbyte == '\n' && pattern < p)
             p--;
           else
             {
@@ -94,9 +82,9 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
         }
       kwsincr (kwset, p, len);
 
-      p = sep;
+      p = sep + 1;
     }
-  while (p);
+  while (p <= pattern + size);
 
   free (buf);
   ptrdiff_t words = kwswords (kwset);
@@ -259,8 +247,14 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
                                                kwsearch->size,
                                                RE_SYNTAX_GREP);
                   }
-                end = memchr (beg + len, eol, (buf + size) - (beg + len));
-                end = end ? end + 1 : buf + size;
+                if (beg + len < buf + size)
+                  {
+                    end = rawmemchr (beg + len, eol);
+                    end++;
+                  }
+                else
+                  end = buf + size;
+
                 if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL)
                     != (size_t) -1)
                   goto success_match_words;
@@ -285,8 +279,13 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
   return -1;
 
  success:
-  end = memchr (beg + len, eol, (buf + size) - (beg + len));
-  end = end ? end + 1 : buf + size;
+  if (beg + len < buf + size)
+    {
+      end = rawmemchr (beg + len, eol);
+      end++;
+    }
+  else
+    end = buf + size;
  success_match_words:
   beg = memrchr (buf, eol, beg - buf);
   beg = beg ? beg + 1 : buf;
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 15a6a59..2fcbf8e 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -107,6 +107,9 @@ jit_exec (struct pcre_comp *pc, char const *subject, int 
search_bytes,
     }
 }
 
+/* Compile the -P style PATTERN, containing SIZE bytes that are
+   followed by '\n'.  Return a description of the compiled pattern.  */
+
 void *
 Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
 {
@@ -120,7 +123,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
                          sizeof xprefix - 1 + sizeof xsuffix - 1);
   char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
   int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0);
-  char const *patlim = pattern + size;
+  char *patlim = pattern + size;
   char *n = re;
   char const *p;
   char const *pnul;
@@ -134,7 +137,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
     }
 
   /* FIXME: Remove this restriction.  */
-  if (memchr (pattern, '\n', size))
+  if (rawmemchr (pattern, '\n') != patlim)
     die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
 
   *n = '\0';
@@ -148,7 +151,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
      replace each NUL byte in the pattern with the four characters
      "\000", removing a preceding backslash if there are an odd
      number of backslashes before the NUL.  */
-  for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
+  *patlim = '\0';
+  for (p = pattern; (pnul = p + strlen (p)) < patlim; p = pnul + 1)
     {
       memcpy (n, p, pnul - p);
       n += pnul - p;
@@ -158,10 +162,10 @@ Pcompile (char *pattern, size_t size, reg_syntax_t 
ignored)
       strcpy (n, "\\000");
       n += 4;
     }
-
-  memcpy (n, p, patlim - p);
+  memcpy (n, p, patlim - p + 1);
   n += patlim - p;
-  *n = '\0';
+  *patlim = '\n';
+
   if (match_words)
     strcpy (n, wsuffix);
   if (match_lines)
@@ -219,7 +223,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
          PCRE_MULTILINE for performance, the performance wasn't always
          better and the correctness issues were too puzzling.  See
          Bug#22655.  */
-      line_end = memchr (p, eolbyte, buf + size - p);
+      line_end = rawmemchr (p, eolbyte);
       if (INT_MAX < line_end - p)
         die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
 

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=71b5c685d0dd3e9b0298e1a9c37b32fbedece340


commit 9393b977015bf7944cec1d71ad3972c101bdb4b8
Author: Paul Eggert <eggert@cs.ucla.edu>
Date:   Mon Sep 7 19:44:21 2020 -0700

    Prefer rawmemchr to memchr when it’s easy
    
    * bootstrap.conf (gnulib_modules): Add rawmemchr.
    * src/dfasearch.c (GEAcompile, EGexecute):
    * src/grep.c (update_patterns, prpending, prtext):
    * src/kwsearch.c (Fcompile, Fexecute):
    * src/pcresearch.c (Pcompile, Pexecute):
    Simplify (and presumably speed up a little) by using rawmemchr
    with a sentinel, instead of using memchr.

diff --git a/bootstrap.conf b/bootstrap.conf
index fceb318..4268623 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -73,6 +73,7 @@ openat-safer
 perl
 propername
 quote
+rawmemchr
 readme-release
 realloc-gnu
 regex
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 256cd39..4d3f4b2 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -174,6 +174,10 @@ regex_compile (struct dfa_comp *dc, char const *p, 
ptrdiff_t len,
   return false;
 }
 
+/* Compile PATTERN, containing SIZE bytes that are followed by '\n'.
+   SYNTAX_BITS specifies whether PATTERN uses style -G, -E, or -A.
+   Return a description of the compiled pattern.  */
+
 void *
 GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
 {
@@ -213,15 +217,8 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
 
   do
     {
-      size_t len;
-      char const *sep = memchr (p, '\n', patlim - p);
-      if (sep)
-        {
-          len = sep - p;
-          sep++;
-        }
-      else
-        len = patlim - p;
+      char const *sep = rawmemchr (p, '\n');
+      ptrdiff_t len = sep - p;
 
       bool backref = possible_backrefs_in_pattern (p, len, bs_safe);
 
@@ -247,7 +244,7 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
       if (!regex_compile (dc, p, len, dc->pcount, lineno, !backref))
         compilation_failed = true;
 
-      p = sep;
+      p = sep + 1;
       lineno++;
 
       if (backref)
@@ -256,12 +253,12 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
           prev = p;
         }
     }
-  while (p);
+  while (p <= patlim);
 
   if (compilation_failed)
     exit (EXIT_TROUBLE);
 
-  if (prev != NULL)
+  if (prev <= patlim)
     {
       if (pattern < prev)
         {
@@ -383,14 +380,19 @@ EGexecute (void *vdc, char const *buf, size_t size, 
size_t *match_size,
                  greater of the latter two values; this temporarily prefers
                  the DFA to KWset.  */
               exact_kwset_match = kwsm.index < dc->kwset_exact_matches;
-              end = ((exact_kwset_match || !dfafast
-                      || MAX (16, match - beg) < (match - prev_beg) >> 2)
-                     ? match
-                     : MAX (16, match - beg) < (buflim - prev_beg) >> 2
-                     ? prev_beg + 4 * MAX (16, match - beg)
-                     : buflim);
-              end = memchr (end, eol, buflim - end);
-              end = end ? end + 1 : buflim;
+              if (exact_kwset_match || !dfafast
+                  || MAX (16, match - beg) < (match - prev_beg) >> 2)
+                {
+                  end = rawmemchr (match, eol);
+                  end++;
+                }
+              else if (MAX (16, match - beg) < (buflim - prev_beg) >> 2)
+                {
+                  end = rawmemchr (prev_beg + 4 * MAX (16, match - beg), eol);
+                  end++;
+                }
+              else
+                end = buflim;
 
               if (exact_kwset_match)
                 {
@@ -425,8 +427,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
                   beg++;
                   dfa_beg = beg;
                 }
-              end = memchr (next_beg, eol, buflim - next_beg);
-              end = end ? end + 1 : buflim;
+              end = rawmemchr (next_beg, eol);
+              end++;
 
               count = 0;
             }
@@ -446,8 +448,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
               beg = memrchr (buf, eol, next_beg - buf);
               beg++;
             }
-          end = memchr (next_beg, eol, buflim - next_beg);
-          end = end ? end + 1 : buflim;
+          end = rawmemchr (next_beg, eol);
+          end++;
 
           /* Successful, no back-references encountered! */
           if (!backref)
diff --git a/src/grep.c b/src/grep.c
index ce2f291..d058a76 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -164,7 +164,7 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, 
ptrdiff_t size,
   ptrdiff_t patsize;
   for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
     {
-      char const *patend = memchr (src, '\n', srclim - src);
+      char const *patend = rawmemchr (src, '\n');
       patsize = patend + 1 - src;
       memmove (dst, src, patsize);
 
@@ -1104,8 +1104,7 @@ static void
 nlscan (char const *lim)
 {
   size_t newlines = 0;
-  char const *beg;
-  for (beg = lastnl; beg < lim; beg++)
+  for (char const *beg = lastnl; beg < lim; beg++)
     {
       beg = memchr (beg, eolbyte, lim - beg);
       if (!beg)
@@ -1353,7 +1352,7 @@ prpending (char const *lim)
     lastout = bufbeg;
   for (; 0 < pending && lastout < lim; pending--)
     {
-      char *nl = memchr (lastout, eolbyte, lim - lastout);
+      char *nl = rawmemchr (lastout, eolbyte);
       prline (lastout, nl + 1, SEP_CHAR_REJECTED);
     }
 }
@@ -1394,7 +1393,7 @@ prtext (char *beg, char *lim)
 
       while (p < beg)
         {
-          char *nl = memchr (p, eol, beg - p);
+          char *nl = rawmemchr (p, eol);
           nl++;
           prline (p, nl, SEP_CHAR_REJECTED);
           p = nl;
@@ -1407,7 +1406,7 @@ prtext (char *beg, char *lim)
       /* One or more lines are output.  */
       for (n = 0; p < lim && n < outleft; n++)
         {
-          char *nl = memchr (p, eol, lim - p);
+          char *nl = rawmemchr (p, eol);
           nl++;
           if (!out_quiet)
             prline (p, nl, SEP_CHAR_SELECTED);
diff --git a/src/kwsearch.c b/src/kwsearch.c
index 6f6d4d0..7081060 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -43,14 +43,13 @@ struct kwsearch
   void *re;
 };
 
-/* Compile the -F style PATTERN, containing SIZE bytes.  Return a
-   description of the compiled pattern.  */
+/* Compile the -F style PATTERN, containing SIZE bytes that are
+   followed by '\n'.  Return a description of the compiled pattern.  */
 
 void *
 Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
 {
   kwset_t kwset;
-  ptrdiff_t total = size;
   char *buf = NULL;
   size_t bufalloc = 0;
 
@@ -59,23 +58,12 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
   char const *p = pattern;
   do
     {
-      ptrdiff_t len;
-      char const *sep = memchr (p, '\n', total);
-      if (sep)
-        {
-          len = sep - p;
-          sep++;
-          total -= (len + 1);
-        }
-      else
-        {
-          len = total;
-          total = 0;
-        }
+      char const *sep = rawmemchr (p, '\n');
+      ptrdiff_t len = sep - p;
 
       if (match_lines)
         {
-          if (eolbyte == '\n' && pattern < p && sep)
+          if (eolbyte == '\n' && pattern < p)
             p--;
           else
             {
@@ -94,9 +82,9 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
         }
       kwsincr (kwset, p, len);
 
-      p = sep;
+      p = sep + 1;
     }
-  while (p);
+  while (p <= pattern + size);
 
   free (buf);
   ptrdiff_t words = kwswords (kwset);
@@ -259,8 +247,14 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
                                                kwsearch->size,
                                                RE_SYNTAX_GREP);
                   }
-                end = memchr (beg + len, eol, (buf + size) - (beg + len));
-                end = end ? end + 1 : buf + size;
+                if (beg + len < buf + size)
+                  {
+                    end = rawmemchr (beg + len, eol);
+                    end++;
+                  }
+                else
+                  end = buf + size;
+
                 if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL)
                     != (size_t) -1)
                   goto success_match_words;
@@ -285,8 +279,13 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
   return -1;
 
  success:
-  end = memchr (beg + len, eol, (buf + size) - (beg + len));
-  end = end ? end + 1 : buf + size;
+  if (beg + len < buf + size)
+    {
+      end = rawmemchr (beg + len, eol);
+      end++;
+    }
+  else
+    end = buf + size;
  success_match_words:
   beg = memrchr (buf, eol, beg - buf);
   beg = beg ? beg + 1 : buf;
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 15a6a59..2fcbf8e 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -107,6 +107,9 @@ jit_exec (struct pcre_comp *pc, char const *subject, int 
search_bytes,
     }
 }
 
+/* Compile the -P style PATTERN, containing SIZE bytes that are
+   followed by '\n'.  Return a description of the compiled pattern.  */
+
 void *
 Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
 {
@@ -120,7 +123,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
                          sizeof xprefix - 1 + sizeof xsuffix - 1);
   char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
   int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0);
-  char const *patlim = pattern + size;
+  char *patlim = pattern + size;
   char *n = re;
   char const *p;
   char const *pnul;
@@ -134,7 +137,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
     }
 
   /* FIXME: Remove this restriction.  */
-  if (memchr (pattern, '\n', size))
+  if (rawmemchr (pattern, '\n') != patlim)
     die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
 
   *n = '\0';
@@ -148,7 +151,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
      replace each NUL byte in the pattern with the four characters
      "\000", removing a preceding backslash if there are an odd
      number of backslashes before the NUL.  */
-  for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
+  *patlim = '\0';
+  for (p = pattern; (pnul = p + strlen (p)) < patlim; p = pnul + 1)
     {
       memcpy (n, p, pnul - p);
       n += pnul - p;
@@ -158,10 +162,10 @@ Pcompile (char *pattern, size_t size, reg_syntax_t 
ignored)
       strcpy (n, "\\000");
       n += 4;
     }
-
-  memcpy (n, p, patlim - p);
+  memcpy (n, p, patlim - p + 1);
   n += patlim - p;
-  *n = '\0';
+  *patlim = '\n';
+
   if (match_words)
     strcpy (n, wsuffix);
   if (match_lines)
@@ -219,7 +223,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
          PCRE_MULTILINE for performance, the performance wasn't always
          better and the correctness issues were too puzzling.  See
          Bug#22655.  */
-      line_end = memchr (p, eolbyte, buf + size - p);
+      line_end = rawmemchr (p, eolbyte);
       if (INT_MAX < line_end - p)
         die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
 

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=33e4602c96e639ec7d56b92ffe3614aa700d3d76


commit 9393b977015bf7944cec1d71ad3972c101bdb4b8
Author: Paul Eggert <eggert@cs.ucla.edu>
Date:   Mon Sep 7 19:44:21 2020 -0700

    Prefer rawmemchr to memchr when it’s easy
    
    * bootstrap.conf (gnulib_modules): Add rawmemchr.
    * src/dfasearch.c (GEAcompile, EGexecute):
    * src/grep.c (update_patterns, prpending, prtext):
    * src/kwsearch.c (Fcompile, Fexecute):
    * src/pcresearch.c (Pcompile, Pexecute):
    Simplify (and presumably speed up a little) by using rawmemchr
    with a sentinel, instead of using memchr.

diff --git a/bootstrap.conf b/bootstrap.conf
index fceb318..4268623 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -73,6 +73,7 @@ openat-safer
 perl
 propername
 quote
+rawmemchr
 readme-release
 realloc-gnu
 regex
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 256cd39..4d3f4b2 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -174,6 +174,10 @@ regex_compile (struct dfa_comp *dc, char const *p, 
ptrdiff_t len,
   return false;
 }
 
+/* Compile PATTERN, containing SIZE bytes that are followed by '\n'.
+   SYNTAX_BITS specifies whether PATTERN uses style -G, -E, or -A.
+   Return a description of the compiled pattern.  */
+
 void *
 GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits)
 {
@@ -213,15 +217,8 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
 
   do
     {
-      size_t len;
-      char const *sep = memchr (p, '\n', patlim - p);
-      if (sep)
-        {
-          len = sep - p;
-          sep++;
-        }
-      else
-        len = patlim - p;
+      char const *sep = rawmemchr (p, '\n');
+      ptrdiff_t len = sep - p;
 
       bool backref = possible_backrefs_in_pattern (p, len, bs_safe);
 
@@ -247,7 +244,7 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
       if (!regex_compile (dc, p, len, dc->pcount, lineno, !backref))
         compilation_failed = true;
 
-      p = sep;
+      p = sep + 1;
       lineno++;
 
       if (backref)
@@ -256,12 +253,12 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits)
           prev = p;
         }
     }
-  while (p);
+  while (p <= patlim);
 
   if (compilation_failed)
     exit (EXIT_TROUBLE);
 
-  if (prev != NULL)
+  if (prev <= patlim)
     {
       if (pattern < prev)
         {
@@ -383,14 +380,19 @@ EGexecute (void *vdc, char const *buf, size_t size, 
size_t *match_size,
                  greater of the latter two values; this temporarily prefers
                  the DFA to KWset.  */
               exact_kwset_match = kwsm.index < dc->kwset_exact_matches;
-              end = ((exact_kwset_match || !dfafast
-                      || MAX (16, match - beg) < (match - prev_beg) >> 2)
-                     ? match
-                     : MAX (16, match - beg) < (buflim - prev_beg) >> 2
-                     ? prev_beg + 4 * MAX (16, match - beg)
-                     : buflim);
-              end = memchr (end, eol, buflim - end);
-              end = end ? end + 1 : buflim;
+              if (exact_kwset_match || !dfafast
+                  || MAX (16, match - beg) < (match - prev_beg) >> 2)
+                {
+                  end = rawmemchr (match, eol);
+                  end++;
+                }
+              else if (MAX (16, match - beg) < (buflim - prev_beg) >> 2)
+                {
+                  end = rawmemchr (prev_beg + 4 * MAX (16, match - beg), eol);
+                  end++;
+                }
+              else
+                end = buflim;
 
               if (exact_kwset_match)
                 {
@@ -425,8 +427,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
                   beg++;
                   dfa_beg = beg;
                 }
-              end = memchr (next_beg, eol, buflim - next_beg);
-              end = end ? end + 1 : buflim;
+              end = rawmemchr (next_beg, eol);
+              end++;
 
               count = 0;
             }
@@ -446,8 +448,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
               beg = memrchr (buf, eol, next_beg - buf);
               beg++;
             }
-          end = memchr (next_beg, eol, buflim - next_beg);
-          end = end ? end + 1 : buflim;
+          end = rawmemchr (next_beg, eol);
+          end++;
 
           /* Successful, no back-references encountered! */
           if (!backref)
diff --git a/src/grep.c b/src/grep.c
index ce2f291..d058a76 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -164,7 +164,7 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, 
ptrdiff_t size,
   ptrdiff_t patsize;
   for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
     {
-      char const *patend = memchr (src, '\n', srclim - src);
+      char const *patend = rawmemchr (src, '\n');
       patsize = patend + 1 - src;
       memmove (dst, src, patsize);
 
@@ -1104,8 +1104,7 @@ static void
 nlscan (char const *lim)
 {
   size_t newlines = 0;
-  char const *beg;
-  for (beg = lastnl; beg < lim; beg++)
+  for (char const *beg = lastnl; beg < lim; beg++)
     {
       beg = memchr (beg, eolbyte, lim - beg);
       if (!beg)
@@ -1353,7 +1352,7 @@ prpending (char const *lim)
     lastout = bufbeg;
   for (; 0 < pending && lastout < lim; pending--)
     {
-      char *nl = memchr (lastout, eolbyte, lim - lastout);
+      char *nl = rawmemchr (lastout, eolbyte);
       prline (lastout, nl + 1, SEP_CHAR_REJECTED);
     }
 }
@@ -1394,7 +1393,7 @@ prtext (char *beg, char *lim)
 
       while (p < beg)
         {
-          char *nl = memchr (p, eol, beg - p);
+          char *nl = rawmemchr (p, eol);
           nl++;
           prline (p, nl, SEP_CHAR_REJECTED);
           p = nl;
@@ -1407,7 +1406,7 @@ prtext (char *beg, char *lim)
       /* One or more lines are output.  */
       for (n = 0; p < lim && n < outleft; n++)
         {
-          char *nl = memchr (p, eol, lim - p);
+          char *nl = rawmemchr (p, eol);
           nl++;
           if (!out_quiet)
             prline (p, nl, SEP_CHAR_SELECTED);
diff --git a/src/kwsearch.c b/src/kwsearch.c
index 6f6d4d0..7081060 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -43,14 +43,13 @@ struct kwsearch
   void *re;
 };
 
-/* Compile the -F style PATTERN, containing SIZE bytes.  Return a
-   description of the compiled pattern.  */
+/* Compile the -F style PATTERN, containing SIZE bytes that are
+   followed by '\n'.  Return a description of the compiled pattern.  */
 
 void *
 Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
 {
   kwset_t kwset;
-  ptrdiff_t total = size;
   char *buf = NULL;
   size_t bufalloc = 0;
 
@@ -59,23 +58,12 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
   char const *p = pattern;
   do
     {
-      ptrdiff_t len;
-      char const *sep = memchr (p, '\n', total);
-      if (sep)
-        {
-          len = sep - p;
-          sep++;
-          total -= (len + 1);
-        }
-      else
-        {
-          len = total;
-          total = 0;
-        }
+      char const *sep = rawmemchr (p, '\n');
+      ptrdiff_t len = sep - p;
 
       if (match_lines)
         {
-          if (eolbyte == '\n' && pattern < p && sep)
+          if (eolbyte == '\n' && pattern < p)
             p--;
           else
             {
@@ -94,9 +82,9 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored)
         }
       kwsincr (kwset, p, len);
 
-      p = sep;
+      p = sep + 1;
     }
-  while (p);
+  while (p <= pattern + size);
 
   free (buf);
   ptrdiff_t words = kwswords (kwset);
@@ -259,8 +247,14 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
                                                kwsearch->size,
                                                RE_SYNTAX_GREP);
                   }
-                end = memchr (beg + len, eol, (buf + size) - (beg + len));
-                end = end ? end + 1 : buf + size;
+                if (beg + len < buf + size)
+                  {
+                    end = rawmemchr (beg + len, eol);
+                    end++;
+                  }
+                else
+                  end = buf + size;
+
                 if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL)
                     != (size_t) -1)
                   goto success_match_words;
@@ -285,8 +279,13 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
   return -1;
 
  success:
-  end = memchr (beg + len, eol, (buf + size) - (beg + len));
-  end = end ? end + 1 : buf + size;
+  if (beg + len < buf + size)
+    {
+      end = rawmemchr (beg + len, eol);
+      end++;
+    }
+  else
+    end = buf + size;
  success_match_words:
   beg = memrchr (buf, eol, beg - buf);
   beg = beg ? beg + 1 : buf;
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 15a6a59..2fcbf8e 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -107,6 +107,9 @@ jit_exec (struct pcre_comp *pc, char const *subject, int 
search_bytes,
     }
 }
 
+/* Compile the -P style PATTERN, containing SIZE bytes that are
+   followed by '\n'.  Return a description of the compiled pattern.  */
+
 void *
 Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
 {
@@ -120,7 +123,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
                          sizeof xprefix - 1 + sizeof xsuffix - 1);
   char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
   int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0);
-  char const *patlim = pattern + size;
+  char *patlim = pattern + size;
   char *n = re;
   char const *p;
   char const *pnul;
@@ -134,7 +137,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
     }
 
   /* FIXME: Remove this restriction.  */
-  if (memchr (pattern, '\n', size))
+  if (rawmemchr (pattern, '\n') != patlim)
     die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
 
   *n = '\0';
@@ -148,7 +151,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored)
      replace each NUL byte in the pattern with the four characters
      "\000", removing a preceding backslash if there are an odd
      number of backslashes before the NUL.  */
-  for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
+  *patlim = '\0';
+  for (p = pattern; (pnul = p + strlen (p)) < patlim; p = pnul + 1)
     {
       memcpy (n, p, pnul - p);
       n += pnul - p;
@@ -158,10 +162,10 @@ Pcompile (char *pattern, size_t size, reg_syntax_t 
ignored)
       strcpy (n, "\\000");
       n += 4;
     }
-
-  memcpy (n, p, patlim - p);
+  memcpy (n, p, patlim - p + 1);
   n += patlim - p;
-  *n = '\0';
+  *patlim = '\n';
+
   if (match_words)
     strcpy (n, wsuffix);
   if (match_lines)
@@ -219,7 +223,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
          PCRE_MULTILINE for performance, the performance wasn't always
          better and the correctness issues were too puzzling.  See
          Bug#22655.  */
-      line_end = memchr (p, eolbyte, buf + size - p);
+      line_end = rawmemchr (p, eolbyte);
       if (INT_MAX < line_end - p)
         die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
 

-----------------------------------------------------------------------

Summary of changes:
 NEWS                     |   4 +
 bootstrap.conf           |   2 +
 src/dfasearch.c          |  54 ++++-----
 src/grep.c               | 285 ++++++++++++++++++++++++++++++-----------------
 src/kwsearch.c           |  45 ++++----
 src/pcresearch.c         |  18 +--
 tests/filename-lineno.pl |  19 +++-
 7 files changed, 263 insertions(+), 164 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]